From 433cceab1fc21a3622fa50343e2dea82b48a3ade Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Mon, 11 Dec 2023 16:50:23 +0100 Subject: [PATCH 1/5] added derive_precursor_mz_from_parent_mass filter --- tools/matchms/matchms_filtering.xml | 24 +++++++++ tools/matchms/matchms_filtering_wrapper.py | 10 ++++ .../filtering/derive_precursor_mz.msp | 48 ++++++++++++++++++ .../filtering/derive_precursor_mz_out.msp | 50 +++++++++++++++++++ 4 files changed, 132 insertions(+) create mode 100644 tools/matchms/test-data/filtering/derive_precursor_mz.msp create mode 100644 tools/matchms/test-data/filtering/derive_precursor_mz_out.msp diff --git a/tools/matchms/matchms_filtering.xml b/tools/matchms/matchms_filtering.xml index 64037f25..23d75fc4 100644 --- a/tools/matchms/matchms_filtering.xml +++ b/tools/matchms/matchms_filtering.xml @@ -55,6 +55,10 @@ #if $require_inchi_is_true == "TRUE" -require_inchi \ #end if + #if $derive_precursor_mz_from_parent_mass.is_true == "TRUE" + -derive_precursor_mz_from_parent_mass \ + --estimate_from_adduct "$derive_precursor_mz_from_parent_mass.estimate_from_adduct" \ + #end if #if $reduce_to_top_n_peaks.is_true == "TRUE" -reduce_to_top_n_peaks \ --n_max "$reduce_to_top_n_peaks.n_max" \ @@ -104,6 +108,18 @@ + + + + + + + + + + + @@ -174,6 +190,14 @@ + + +
+ + +
+ +
diff --git a/tools/matchms/matchms_filtering_wrapper.py b/tools/matchms/matchms_filtering_wrapper.py index a27c96b9..be3db93e 100644 --- a/tools/matchms/matchms_filtering_wrapper.py +++ b/tools/matchms/matchms_filtering_wrapper.py @@ -6,6 +6,7 @@ add_retention_index, add_retention_time, clean_compound_name from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \ select_by_relative_intensity +from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass from matchms.importing import load_from_mgf, load_from_msp @@ -39,6 +40,9 @@ def main(argv): help="Remove spectra that does not contain SMILES.") parser.add_argument("-require_inchi", action='store_true', help="Remove spectra that does not contain INCHI.") + parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true', + help="Derives the precursor_mz from the parent mass and adduct or charge.") + parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.") parser.add_argument("-reduce_to_top_n_peaks", action='store_true', help="reduce to top n peaks filter.") parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.") @@ -51,6 +55,7 @@ def main(argv): or args.mz_range or args.require_smiles or args.require_inchi + or args.derive_precursor_mz_from_parent_mass or args.reduce_to_top_n_peaks): raise ValueError('No filter selected.') @@ -84,6 +89,11 @@ def main(argv): if args.reduce_to_top_n_peaks: spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max) + if args.derive_precursor_mz_from_parent_mass: + spectrum.set("parent_mass", int(float(spectrum.get('parent_mass')))) + precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct) + spectrum.set("precursor_mz", precursor_mz) + if args.require_smiles and spectrum is not None: spectrum = require_key(spectrum, "smiles") diff --git a/tools/matchms/test-data/filtering/derive_precursor_mz.msp b/tools/matchms/test-data/filtering/derive_precursor_mz.msp new file mode 100644 index 00000000..27da139f --- /dev/null +++ b/tools/matchms/test-data/filtering/derive_precursor_mz.msp @@ -0,0 +1,48 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C14H10 +INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N +SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Phenanthrene +RETENTION_TIME: None +RETENTION_INDEX: 1832.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 177.070224 +NUM PEAKS: 5 +152.0619 0.1657993569424221 +176.062 0.24558560966311757 +177.06982 0.12764433529926775 +178.0775 1.0 +179.08078 0.16394988149600653 + diff --git a/tools/matchms/test-data/filtering/derive_precursor_mz_out.msp b/tools/matchms/test-data/filtering/derive_precursor_mz_out.msp new file mode 100644 index 00000000..e4089de5 --- /dev/null +++ b/tools/matchms/test-data/filtering/derive_precursor_mz_out.msp @@ -0,0 +1,50 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251 +PRECURSOR_MZ: 250.99945142009076 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C14H10 +INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N +SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Phenanthrene +RETENTION_TIME: None +RETENTION_INDEX: 1832.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 177 +PRECURSOR_MZ: 176.99945142009076 +NUM PEAKS: 5 +152.0619 0.1657993569424221 +176.062 0.24558560966311757 +177.06982 0.12764433529926775 +178.0775 1.0 +179.08078 0.16394988149600653 + From 35ee098fe695f137524bc8378f40a8746193be2a Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Mon, 11 Dec 2023 17:14:23 +0100 Subject: [PATCH 2/5] bump galaxy version --- tools/matchms/matchms_filtering.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/matchms/matchms_filtering.xml b/tools/matchms/matchms_filtering.xml index 23d75fc4..780cdf1f 100644 --- a/tools/matchms/matchms_filtering.xml +++ b/tools/matchms/matchms_filtering.xml @@ -1,4 +1,4 @@ - + filter and normalize mass spectrometry data From 9e579c453953884d3c9bc548a44bbf0da6449b04 Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Tue, 12 Dec 2023 10:59:26 +0100 Subject: [PATCH 3/5] removed conversion to int --- tools/matchms/matchms_filtering_wrapper.py | 2 +- .../test-data/filtering/derive_precursor_mz_out.msp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/matchms/matchms_filtering_wrapper.py b/tools/matchms/matchms_filtering_wrapper.py index be3db93e..906eb563 100644 --- a/tools/matchms/matchms_filtering_wrapper.py +++ b/tools/matchms/matchms_filtering_wrapper.py @@ -90,7 +90,7 @@ def main(argv): spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max) if args.derive_precursor_mz_from_parent_mass: - spectrum.set("parent_mass", int(float(spectrum.get('parent_mass')))) + spectrum.set("parent_mass", float(spectrum.get('parent_mass'))) precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct) spectrum.set("precursor_mz", precursor_mz) diff --git a/tools/matchms/test-data/filtering/derive_precursor_mz_out.msp b/tools/matchms/test-data/filtering/derive_precursor_mz_out.msp index e4089de5..c885bc13 100644 --- a/tools/matchms/test-data/filtering/derive_precursor_mz_out.msp +++ b/tools/matchms/test-data/filtering/derive_precursor_mz_out.msp @@ -15,8 +15,8 @@ ADDUCT: [M]+ COLLISION_ENERGY: 70eV INSTRUMENT_TYPE: GC-EI-Orbitrap CHARGE: 1 -PARENT_MASS: 251 -PRECURSOR_MZ: 250.99945142009076 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 NUM PEAKS: 3 250.07765 0.3282529462971431 252.09323 1.0 @@ -39,8 +39,8 @@ ADDUCT: [M]+ COLLISION_ENERGY: 70eV INSTRUMENT_TYPE: GC-EI-Orbitrap CHARGE: 1 -PARENT_MASS: 177 -PRECURSOR_MZ: 176.99945142009076 +PARENT_MASS: 177.070224 +PRECURSOR_MZ: 177.06967542009076 NUM PEAKS: 5 152.0619 0.1657993569424221 176.062 0.24558560966311757 From 4b26c79f7339bb25506a6a5cbe8c53ef2e2fe482 Mon Sep 17 00:00:00 2001 From: Helge Hecht Date: Tue, 12 Dec 2023 10:25:59 +0000 Subject: [PATCH 4/5] Update tools/matchms/matchms_filtering.xml --- tools/matchms/matchms_filtering.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/matchms/matchms_filtering.xml b/tools/matchms/matchms_filtering.xml index 780cdf1f..68e785c0 100644 --- a/tools/matchms/matchms_filtering.xml +++ b/tools/matchms/matchms_filtering.xml @@ -55,9 +55,9 @@ #if $require_inchi_is_true == "TRUE" -require_inchi \ #end if - #if $derive_precursor_mz_from_parent_mass.is_true == "TRUE" + #if ${derive_precursor_mz_from_parent_mass.is_true} == "TRUE" -derive_precursor_mz_from_parent_mass \ - --estimate_from_adduct "$derive_precursor_mz_from_parent_mass.estimate_from_adduct" \ + --estimate_from_adduct "${derive_precursor_mz_from_parent_mass.estimate_from_adduct}" \ #end if #if $reduce_to_top_n_peaks.is_true == "TRUE" -reduce_to_top_n_peaks \ From 94656ff1f209901a8451e8e088be150958f0027b Mon Sep 17 00:00:00 2001 From: Zargham Ahmad Date: Tue, 12 Dec 2023 13:16:26 +0100 Subject: [PATCH 5/5] removed brackets --- tools/matchms/matchms_filtering.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/matchms/matchms_filtering.xml b/tools/matchms/matchms_filtering.xml index 68e785c0..f76443dc 100644 --- a/tools/matchms/matchms_filtering.xml +++ b/tools/matchms/matchms_filtering.xml @@ -55,7 +55,7 @@ #if $require_inchi_is_true == "TRUE" -require_inchi \ #end if - #if ${derive_precursor_mz_from_parent_mass.is_true} == "TRUE" + #if $derive_precursor_mz_from_parent_mass.is_true == "TRUE" -derive_precursor_mz_from_parent_mass \ --estimate_from_adduct "${derive_precursor_mz_from_parent_mass.estimate_from_adduct}" \ #end if