Skip to content

Commit 2ee4ccb

Browse files
committed
Add a parameter to override a test and include spectra for mzML/mzXML centroided spectra
Existing code always tests the spectrum peak data for a minimum median peak distance of 50 ppm (to be considered centroid data), which always overrides the value read from an mzML or mzXML file. The parameter overrides the test result in the case that the mzML/mzXML file reports the spectrum as centroided, but the peak data failed the test.
1 parent 2712ab0 commit 2ee4ccb

File tree

9 files changed

+95
-14
lines changed

9 files changed

+95
-14
lines changed

docs/Changelog.html

+7
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ <h1 class="pagetitle">MS-GF+ ChangeLog</h1>
1313
<a href="index.html">MS-GF+ Documentation home</a>
1414
</p>
1515

16+
<p>
17+
<b>v2023.01.12</b>
18+
</p>
19+
<ul>
20+
<li>Add parameter and output messages for working with particularly dense centroided data (read from mzML or mzXML)</li>
21+
</ul>
22+
1623
<p>
1724
<b>v2022.04.18</b>
1825
</p>

docs/MSGFPlus.html

+4
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ <h1>MS-GF+</h1>
9090
<span class="code-keyword">[-maxMissedCleavages Count]</span> (Exclude peptides with more than this number of missed cleavages from the search; <span class="code-object">Default: -1 (no limit)</span>)
9191

9292
<span class="code-keyword">[-numMods Count]</span> (Maximum number of dynamic (variable) modifications per peptide; <span class="code-object">Default: 3</span>)
93+
94+
<span class="code-keyword">[-allowDenseCentroidedPeaks 0/1]</span> (<span class="code-object">Default: 0 (disabled)</span>; 1: (for mzML/mzXML input only) allows inclusion of spectra with high-density centroid data in the search)
95+
MS-GF+ checks the distance between consecutive peaks in the spectrum, and if the median distance is less than 50 ppm, they are considered profile spectra regardless of the value provided in mzML and mzXML files.
96+
This parameter allows overriding this check when the mzML/mzXML file says the spectrum is centroided.
9397
</pre>
9498
</div>
9599

src/main/java/edu/ucsd/msjava/msdbsearch/SearchParams.java

+7
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ public class SearchParams {
5050
private double chargeCarrierMass;
5151
private int maxMissedCleavages;
5252
private int maxNumMods;
53+
private boolean allowDenseCentroidedPeaks;
5354

5455
public SearchParams() {
5556
}
@@ -214,6 +215,10 @@ public int getMaxMissedCleavages() {
214215
return maxMissedCleavages;
215216
}
216217

218+
// Used by MS-GF+
219+
public boolean getAllowDenseCentroidedPeaks() {
220+
return allowDenseCentroidedPeaks;
221+
}
217222

218223
/**
219224
* Look for # in dataLine
@@ -402,6 +407,8 @@ public String parse(ParamManager paramManager) {
402407
} else if (maxMissedCleavages > -1 && enzyme.getName().equals("NoCleavage")) {
403408
return "Cannot specify a MaxMissedCleavages when using no cleavage enzyme";
404409
}
410+
411+
allowDenseCentroidedPeaks = paramManager.getAllowDenseCentroidedPeaks() == 1;
405412

406413
maxNumMods = paramManager.getMaxNumModsPerPeptide();
407414
int maxNumModsCompare = aaSet.getMaxNumberOfVariableModificationsPerPeptide();

src/main/java/edu/ucsd/msjava/msutil/SpecKey.java

+22-6
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ public static ArrayList<SpecKey> getSpecKeyList(
6767
int minCharge,
6868
int maxCharge,
6969
ActivationMethod activationMethod,
70-
int minNumPeaksPerSpectrum) {
70+
int minNumPeaksPerSpectrum,
71+
boolean allowDenseCentroidedData) {
7172

7273
Iterator<Spectrum> itr = specAcc.getSpecItr();
7374

@@ -78,7 +79,8 @@ public static ArrayList<SpecKey> getSpecKeyList(
7879
minCharge,
7980
maxCharge,
8081
activationMethod,
81-
minNumPeaksPerSpectrum);
82+
minNumPeaksPerSpectrum,
83+
allowDenseCentroidedData);
8284

8385

8486
SpectrumParser parser = specAcc.getSpectrumParser();
@@ -101,14 +103,16 @@ public static ArrayList<SpecKey> getSpecKeyList(
101103
int minCharge,
102104
int maxCharge,
103105
ActivationMethod activationMethod,
104-
int minNumPeaksPerSpectrum) {
106+
int minNumPeaksPerSpectrum,
107+
boolean allowDenseCentroidedData) {
105108

106109
if (activationMethod == ActivationMethod.FUSION)
107110
return getFusedSpecKeyList(itr, startSpecIndex, endSpecIndex, minCharge, maxCharge);
108111

109112
ArrayList<SpecKey> specKeyList = new ArrayList<SpecKey>();
110113

111114
int numProfileSpectra = 0;
115+
int numDenseCentroidedSpectra = 0;
112116
int numSpectraWithTooFewPeaks = 0;
113117
final int MAX_INFORMATIVE_MESSAGES = 10;
114118
int informativeMessageCount = 0;
@@ -176,17 +180,25 @@ public static ArrayList<SpecKey> getSpecKeyList(
176180
}
177181
}
178182

179-
if (!spec.isCentroided()) {
183+
if (!spec.isCentroided() && !(spec.isCentroidedWithDensePeaks() && allowDenseCentroidedData)) {
184+
String message = "Skip spectrum " + spec.getID() + " since ";
185+
if (spec.isCentroidedWithDensePeaks()) {
186+
message += "peaks are too dense";
187+
numDenseCentroidedSpectra++;
188+
} else {
189+
message += "it is not centroided";
190+
numProfileSpectra++;
191+
}
192+
180193
if (informativeMessageCount < MAX_INFORMATIVE_MESSAGES) {
181-
System.out.println("Skip spectrum " + spec.getID() + " since it is not centroided");
194+
System.out.println(message);
182195
informativeMessageCount++;
183196
} else {
184197
if (informativeMessageCount == MAX_INFORMATIVE_MESSAGES) {
185198
System.out.println(" ...");
186199
informativeMessageCount++;
187200
}
188201
}
189-
numProfileSpectra++;
190202
continue;
191203
}
192204

@@ -206,6 +218,10 @@ public static ArrayList<SpecKey> getSpecKeyList(
206218

207219
System.out.println("Ignoring " + numProfileSpectra + " profile spectra.");
208220
System.out.println("Ignoring " + numSpectraWithTooFewPeaks + " spectra having less than " + minNumPeaksPerSpectrum + " peaks.");
221+
if (numDenseCentroidedSpectra > 0) {
222+
System.out.println("Ignoring " + numDenseCentroidedSpectra + " spectra marked as centroid with dense peaks (<50ppm median distance).\n" +
223+
" Re-run search with parameter '-allowDenseCentroidedPeaks 1' to include these spectra in the search");
224+
}
209225

210226
return specKeyList;
211227
}

src/main/java/edu/ucsd/msjava/msutil/Spectrum.java

+29-3
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ public enum Polarity {
4444
private Polarity scanPolarity = Polarity.POSITIVE;
4545

4646
private Boolean isCentroided = true;
47+
private Boolean externalSetIsCentroided = false;
48+
private Boolean isCentroidedWithDensePeaks = false;
4749

4850
private boolean isHighPrecision = false;
4951
// private Tolerance precursorTolerance = null;
@@ -256,6 +258,15 @@ public boolean isCentroided() {
256258
return this.isCentroided;
257259
}
258260

261+
/**
262+
* Whether this spectrum is centroided according to the reader, but failed determineIfCentroided() because peaks are too dense.
263+
*
264+
* @return false unless the reader called setIsCentroided(true) and determineIfCentroided() failed
265+
*/
266+
public boolean isCentroidedWithDensePeaks() {
267+
return this.isCentroidedWithDensePeaks;
268+
}
269+
259270
/**
260271
* Returns whether this spectrum peaks are measured with high-precision.
261272
*
@@ -437,6 +448,8 @@ public void setScanPolarity(Polarity scanPolarity) {
437448
*/
438449
public void setIsCentroided(boolean isCentroided) {
439450
this.isCentroided = isCentroided;
451+
// function is used for mzML and mzXML files, track that isCentroided was set outside of this class
452+
this.externalSetIsCentroided = true;
440453
}
441454

442455
/**
@@ -489,7 +502,7 @@ public Float getIsolationWindowTargetMz() {
489502
* Sets isCentroided by a simple testing.
490503
*/
491504
public void determineIsCentroided() {
492-
this.isCentroided = true;
505+
boolean centroidedCheckPass = true;
493506

494507
// if(this.size() > 100)
495508
// {
@@ -516,8 +529,21 @@ public void determineIsCentroided() {
516529
prevMz = curMz;
517530
}
518531
Collections.sort(diff);
519-
if (diff.size() > 0 && diff.get(diff.size() / 2) < 50)
520-
isCentroided = false;
532+
if (diff.size() > 0 && diff.get(diff.size() / 2) < 50) {
533+
// Check failed - the median PPM distance between peaks is less than 50 PPM
534+
centroidedCheckPass = false;
535+
}
536+
}
537+
538+
if (centroidedCheckPass) {
539+
this.isCentroided = true;
540+
} else {
541+
if (this.isCentroided && this.externalSetIsCentroided) {
542+
// set a flag to notify the user
543+
this.isCentroidedWithDensePeaks = true;
544+
}
545+
546+
this.isCentroided = false;
521547
}
522548
}
523549

src/main/java/edu/ucsd/msjava/params/ParamManager.java

+19
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,9 @@ public enum ParamNameEnum {
131131
ADD_FEATURES("addFeatures", "AddFeatures", "Include additional features in the output (enable this to post-process results with Percolator)",
132132
"0 means Output basic scores only (Default)\n" +
133133
"\t 1 means Output additional features"),
134+
135+
ALLOW_DENSE_CENTROIDED_PEAKS("allowDenseCentroidedPeaks", "AllowDenseCentroidedPeaks", "Allow centroid scans with dense peaks (Default: 0)\n" +
136+
"\t (for mzML or mzXML files, the console output will tell you if you might want to use this)", null),
134137

135138
DD_DIRECTORY("dd", "DBIndexDir", "Path to the directory containing database index files", null),
136139

@@ -652,6 +655,13 @@ private void addMaxNumModsParam() {
652655
addParameter(maxNumMods);
653656
}
654657

658+
private void addAllowDenseCentroidedPeaksParam() {
659+
EnumParameter allowDenseCentroidedPeaksParam = new EnumParameter(ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS);
660+
allowDenseCentroidedPeaksParam.registerEntry("Skip all spectra that fail a peak density check").setDefault();
661+
allowDenseCentroidedPeaksParam.registerEntry("Allow mzML/mzXML centroided spectra that fail a peak density check");
662+
addParameter(allowDenseCentroidedPeaksParam);
663+
}
664+
655665
private void addDbIndexDirParam(boolean isHidden) {
656666
FileParameter dbIndexDirParam = new FileParameter(ParamNameEnum.DD_DIRECTORY);
657667
dbIndexDirParam.fileMustExist();
@@ -780,6 +790,8 @@ public void addMSGFPlusParams() {
780790
addChargeCarrierMassParam();
781791
addMaxMissedCleavagesParam();
782792
addMaxNumModsParam();
793+
794+
addAllowDenseCentroidedPeaksParam();
783795

784796
addExample("Example (high-precision): java -Xmx3500M -jar MSGFPlus.jar -s test.mzML -d IPI_human_3.79.fasta -inst 1 -t 20ppm -ti -1,2 -ntt 2 -tda 1 -o testMSGFPlus.mzid -mod Mods.txt");
785797
addExample("Example (low-precision): java -Xmx3500M -jar MSGFPlus.jar -s test.mzML -d IPI_human_3.79.fasta -inst 0 -t 0.5Da,2.5Da -ntt 2 -tda 1 -o testMSGFPlus.mzid -mod Mods.txt");
@@ -907,6 +919,8 @@ public void addMSGFDBParams() {
907919
uniformAAProb.registerEntry("Use amino acid probabilities computed from the input database").setDefault();
908920
uniformAAProb.registerEntry("Use probability 0.05 for all amino acids");
909921
addParameter(uniformAAProb);
922+
923+
addAllowDenseCentroidedPeaksParam();
910924

911925
addExample("Example (high-precision): java -Xmx2000M -jar MSGFDB.jar -s test.mzXML -d IPI_human_3.79.fasta -t 30ppm -c13 1 -nnet 0 -tda 1 -o testMSGFDB.tsv");
912926
addExample("Example (low-precision): java -Xmx2000M -jar MSGFDB.jar -s test.mzXML -d IPI_human_3.79.fasta -t 0.5Da,2.5Da -nnet 0 -tda 1 -o testMSGFDB.tsv");
@@ -1175,6 +1189,11 @@ public FileParameter getConfigFileParam() {
11751189
return ((FileParameter) getParameter(ParamNameEnum.CONFIGURATION_FILE.key));
11761190
}
11771191

1192+
// Used by MS-GF+
1193+
public int getAllowDenseCentroidedPeaks() {
1194+
return getIntValue(ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS.key);
1195+
}
1196+
11781197
public int getIntValue(String key) {
11791198
Parameter param = this.getParameter(key);
11801199
if (param instanceof IntParameter)

src/main/java/edu/ucsd/msjava/ui/MSGFDB.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ private static String runMSGFDB(File specFile, SpecFileFormat specFormat, File o
208208
boolean useUniformAAProb = paramManager.getIntValue(ParamManager.ParamNameEnum.UNIFORM_AA_PROBABILITY.getKey()) == 1;
209209
boolean replicateMergedResults = paramManager.getIntValue("replicate") == 1;
210210
boolean doNotDseEdgeScore = paramManager.getIntValue(ParamManager.ParamNameEnum.EDGE_SCORE.getKey()) == 1;
211+
boolean allowDenseCentroidedPeaks = paramManager.getIntValue(ParamManager.ParamNameEnum.ALLOW_DENSE_CENTROIDED_PEAKS.getKey()) == 1;
211212

212213
System.out.println("Loading database files...");
213214
File dbIndexDir = paramManager.getFile(ParamManager.ParamNameEnum.DD_DIRECTORY.getKey());
@@ -277,7 +278,7 @@ private static String runMSGFDB(File specFile, SpecFileFormat specFormat, File o
277278
int avgPeptideMass = 2000;
278279
int numBytesPerMass = 12;
279280
int numSpecScannedTogether = (int) ((float) maxMemory / avgPeptideMass / numBytesPerMass);
280-
ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM);
281+
ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM, allowDenseCentroidedPeaks);
281282
int specSize = specKeyList.size();
282283

283284
System.out.print("Reading spectra finished ");

src/main/java/edu/ucsd/msjava/ui/MSGFDBLib.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ public static String runMSGFLib(ParamManager paramManager) {
104104
int avgPeptideMass = 2000;
105105
int numBytesPerMass = 12;
106106
int numSpecScannedTogether = (int) ((float) maxMemory / avgPeptideMass / numBytesPerMass);
107-
ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), 0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM);
107+
ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc.getSpecItr(), 0, Integer.MAX_VALUE, 0, Integer.MAX_VALUE, activationMethod, Constants.MIN_NUM_PEAKS_PER_SPECTRUM, false);
108108
int specSize = specKeyList.size();
109109

110110
System.out.print("Reading spectra finished ");

src/main/java/edu/ucsd/msjava/ui/MSGFPlus.java

+4-3
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222

2323

2424
public class MSGFPlus {
25-
public static final String VERSION = "Release (v2022.04.18)";
26-
public static final String RELEASE_DATE = "18 April 2022";
25+
public static final String VERSION = "Release (v2023.01.12)";
26+
public static final String RELEASE_DATE = "12 January 2023";
2727

2828
public static final String DECOY_DB_EXTENSION = ".revCat.fasta";
2929
public static final String DEFAULT_DECOY_PROTEIN_PREFIX = "XXX";
@@ -185,6 +185,7 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o
185185

186186
int numThreads = params.getNumThreads();
187187
boolean doNotUseEdgeScore = params.doNotUseEdgeScore();
188+
boolean allowDenseCentroidedPeaks = params.getAllowDenseCentroidedPeaks();
188189

189190
int minNumPeaksPerSpectrum = params.getMinNumPeaksPerSpectrum();
190191
if (minNumPeaksPerSpectrum == -1) // not specified
@@ -267,7 +268,7 @@ private static String runMSGFPlus(int ioIndex, SpecFileFormat specFormat, File o
267268
return "Error while parsing spectrum file: " + specFile.getPath();
268269

269270
ArrayList<SpecKey> specKeyList = SpecKey.getSpecKeyList(specAcc,
270-
startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, minNumPeaksPerSpectrum);
271+
startSpecIndex, endSpecIndex, minCharge, maxCharge, activationMethod, minNumPeaksPerSpectrum, allowDenseCentroidedPeaks);
271272

272273
int specSize = specKeyList.size();
273274
if (specSize == 0)

0 commit comments

Comments
 (0)