Skip to content

Commit

Permalink
Merge pull request RECETOX#475 from zargham-ahmad/issue457
Browse files Browse the repository at this point in the history
matchms_filtering: Added derive_precursor_mz_from_parent_mass filter
  • Loading branch information
hechth authored Dec 14, 2023
2 parents 970c7dc + 94656ff commit 98223db
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 1 deletion.
26 changes: 25 additions & 1 deletion tools/matchms/matchms_filtering.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy0" profile="21.09">
<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy1" profile="21.09">
<description>filter and normalize mass spectrometry data</description>

<macros>
Expand Down Expand Up @@ -55,6 +55,10 @@
#if $require_inchi_is_true == "TRUE"
-require_inchi \
#end if
#if $derive_precursor_mz_from_parent_mass.is_true == "TRUE"
-derive_precursor_mz_from_parent_mass \
--estimate_from_adduct "${derive_precursor_mz_from_parent_mass.estimate_from_adduct}" \
#end if
#if $reduce_to_top_n_peaks.is_true == "TRUE"
-reduce_to_top_n_peaks \
--n_max "$reduce_to_top_n_peaks.n_max" \
Expand Down Expand Up @@ -104,6 +108,18 @@
<param name="require_inchi_is_true" label="Require INCHI" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
help="Remove spectra that does not contain INCHI." />

<conditional name="derive_precursor_mz_from_parent_mass">
<param name="is_true" label="Derive precursor_mz from parent_mass" type="select"
help="Derives the precursor_mz from the parent mass and adduct or charge.">
<option value="FALSE" selected="true">FALSE</option>
<option value="TRUE">TRUE</option>
</param>
<when value="TRUE">
<param label="Estimate from adduct" name="estimate_from_adduct" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" />
</when>
<when value="FALSE"></when>
</conditional>

<conditional name="reduce_to_top_n_peaks">
<param name="is_true" label="Reduce to top n peaks" type="select"
help="Lowest intensity peaks will be removed when it has more peaks than desired.">
Expand Down Expand Up @@ -174,6 +190,14 @@
</section>
<output name="output" file="filtering/reduce_to_top_n_peaks.msp" ftype="msp"/>
</test>
<test>
<param name="spectra" value="filtering/derive_precursor_mz.msp" ftype="msp"/>
<section name="derive_precursor_mz_from_parent_mass">
<param name="is_true" value="TRUE"/>
<param name="estimate_from_adduct" value="TRUE"/>
</section>
<output name="output" file="filtering/derive_precursor_mz_out.msp" ftype="msp"/>
</test>
</tests>

<help>
Expand Down
10 changes: 10 additions & 0 deletions tools/matchms/matchms_filtering_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
add_retention_index, add_retention_time, clean_compound_name
from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \
select_by_relative_intensity
from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass
from matchms.importing import load_from_mgf, load_from_msp


Expand Down Expand Up @@ -39,6 +40,9 @@ def main(argv):
help="Remove spectra that does not contain SMILES.")
parser.add_argument("-require_inchi", action='store_true',
help="Remove spectra that does not contain INCHI.")
parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true',
help="Derives the precursor_mz from the parent mass and adduct or charge.")
parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.")
parser.add_argument("-reduce_to_top_n_peaks", action='store_true',
help="reduce to top n peaks filter.")
parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.")
Expand All @@ -51,6 +55,7 @@ def main(argv):
or args.mz_range
or args.require_smiles
or args.require_inchi
or args.derive_precursor_mz_from_parent_mass
or args.reduce_to_top_n_peaks):
raise ValueError('No filter selected.')

Expand Down Expand Up @@ -84,6 +89,11 @@ def main(argv):
if args.reduce_to_top_n_peaks:
spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max)

if args.derive_precursor_mz_from_parent_mass:
spectrum.set("parent_mass", float(spectrum.get('parent_mass')))
precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct)
spectrum.set("precursor_mz", precursor_mz)

if args.require_smiles and spectrum is not None:
spectrum = require_key(spectrum, "smiles")

Expand Down
48 changes: 48 additions & 0 deletions tools/matchms/test-data/filtering/derive_precursor_mz.msp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
SCANNUMBER: -1
IONMODE: positive
SPECTRUMTYPE: Centroid
FORMULA: C20H12
INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
IONIZATION: EI+
LICENSE: CC BY-NC
COMPOUND_NAME: Perylene
RETENTION_TIME: None
RETENTION_INDEX: 2886.9
ADDUCT: [M]+
COLLISION_ENERGY: 70eV
INSTRUMENT_TYPE: GC-EI-Orbitrap
CHARGE: 1
PARENT_MASS: 251.08595400000002
NUM PEAKS: 3
250.07765 0.3282529462971431
252.09323 1.0
253.09656 0.20573802940517583

SCANNUMBER: -1
IONMODE: positive
SPECTRUMTYPE: Centroid
FORMULA: C14H10
INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
IONIZATION: EI+
LICENSE: CC BY-NC
COMPOUND_NAME: Phenanthrene
RETENTION_TIME: None
RETENTION_INDEX: 1832.9
ADDUCT: [M]+
COLLISION_ENERGY: 70eV
INSTRUMENT_TYPE: GC-EI-Orbitrap
CHARGE: 1
PARENT_MASS: 177.070224
NUM PEAKS: 5
152.0619 0.1657993569424221
176.062 0.24558560966311757
177.06982 0.12764433529926775
178.0775 1.0
179.08078 0.16394988149600653

50 changes: 50 additions & 0 deletions tools/matchms/test-data/filtering/derive_precursor_mz_out.msp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
SCANNUMBER: -1
IONMODE: positive
SPECTRUMTYPE: Centroid
FORMULA: C20H12
INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
IONIZATION: EI+
LICENSE: CC BY-NC
COMPOUND_NAME: Perylene
RETENTION_TIME: None
RETENTION_INDEX: 2886.9
ADDUCT: [M]+
COLLISION_ENERGY: 70eV
INSTRUMENT_TYPE: GC-EI-Orbitrap
CHARGE: 1
PARENT_MASS: 251.08595400000002
PRECURSOR_MZ: 251.08540542009078
NUM PEAKS: 3
250.07765 0.3282529462971431
252.09323 1.0
253.09656 0.20573802940517583

SCANNUMBER: -1
IONMODE: positive
SPECTRUMTYPE: Centroid
FORMULA: C14H10
INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
IONIZATION: EI+
LICENSE: CC BY-NC
COMPOUND_NAME: Phenanthrene
RETENTION_TIME: None
RETENTION_INDEX: 1832.9
ADDUCT: [M]+
COLLISION_ENERGY: 70eV
INSTRUMENT_TYPE: GC-EI-Orbitrap
CHARGE: 1
PARENT_MASS: 177.070224
PRECURSOR_MZ: 177.06967542009076
NUM PEAKS: 5
152.0619 0.1657993569424221
176.062 0.24558560966311757
177.06982 0.12764433529926775
178.0775 1.0
179.08078 0.16394988149600653

0 comments on commit 98223db

Please sign in to comment.