From 62013c0ce0e571cc8ce20c2e8b3b9174773179da Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Tue, 28 May 2024 10:43:40 -0500 Subject: [PATCH 01/13] The lie's comment about the glyco-searching --- .../EngineLayer/GlycoSearch/AdjNode.cs | 1 + .../EngineLayer/GlycoSearch/Glycan.cs | 62 +++++++++++-------- .../EngineLayer/GlycoSearch/GlycanBox.cs | 4 +- .../EngineLayer/GlycoSearch/GlycanDatabase.cs | 23 ++++--- .../EngineLayer/GlycoSearch/GlycoPeptides.cs | 4 +- .../GlycoSearch/GlycoSearchEngine.cs | 39 +++++++----- .../GlycoSearch/GlycoSpectralMatch.cs | 3 +- .../EngineLayer/GlycoSearch/ModBox.cs | 2 +- MetaMorpheus/EngineLayer/GlycoSearch/Node.cs | 2 +- 9 files changed, 80 insertions(+), 60 deletions(-) diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs b/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs index 31843be86..d7d20b5db 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs @@ -4,6 +4,7 @@ namespace EngineLayer.GlycoSearch { + //the class is for localization graph matrix. Each node in the matrix is represented by AdjNode. public class AdjNode { //AdjNode -> Adjactent node is used to build graph matrix for localizaiton. Each node in graph matrix contain Sources, max cost, current cost, etc. diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs b/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs index 0ce4f1361..27c03a9c0 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs @@ -16,7 +16,7 @@ public GlycanIon(string ionStruct, int ionMass, byte[] ionKind, int lossIonMass) IonStruct = ionStruct; IonMass = ionMass; IonKind = ionKind; - LossIonMass = lossIonMass; + LossIonMass = lossIonMass; // neutral loss mass } public string IonStruct { get; set; } public int IonMass { get; set; } @@ -57,12 +57,12 @@ public string Composition public List Ions { get; set; } public bool Decoy { get; private set; } - public HashSet DiagnosticIons + public HashSet DiagnosticIons //B ions, and there are more ions to set... { get - { + { //kind[] is the sugar type composition of glycan, and each index represent the corresponding sugar type. HashSet diagnosticIons = new HashSet(); - if (Kind[0] >= 1) + if (Kind[0] >= 1) //if we have Hexose(the number more than one), then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(10902895 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(11503951 - hydrogenAtomMonoisotopicMass); @@ -134,10 +134,10 @@ public HashSet DiagnosticIons {"Xylose", new Tuple('X', 9) } }; - public readonly static HashSet CommonOxoniumIons = new HashSet - {13805550, 16806607, 18607663, 20408720, 36614002 }; + public readonly static HashSet CommonOxoniumIons = new HashSet //The same ion as we describe above in the diagnostic ions. That just for the initial matching with the gkycan. + {13805550, 16806607, 18607663, 20408720, 36614002 };// some software use the ions to predict verified glycopeptide (pre-filter). - public readonly static int[] AllOxoniumIons = new int[] + public readonly static int[] AllOxoniumIons = new int[] //The same ion as we describe above in the diagnostic ions. We didn't use the ions for matching now. {10902895, 11503951, 12605550, 12703952, 13805550, 14406607, 16306064, 16806607, 18607663, 20408720, 27409268, 29008759, 29210324, 30809816, 36614002, 65723544, 67323035}; //TrimannosylCore is only useful for N-Glyco peptides. @@ -160,14 +160,17 @@ public HashSet DiagnosticIons #region Glycan Structure manipulation - //There are two ways to represent a glycan in string, one only combination, the other structure. - //The method generate a glycan by read in a glycan structure string from database. + //There are two ways to represent a glycan in string, one is only composition, and the other is included linkage and composition information. + // first one: HexNAc(2)Hex(5)NeuAc(1)NeuGc(1)Fuc(1)Phospho(1)Sulfo(1)Na(1)Ac(1)Xylose(1), second one: (N(H(A))(N(H(A))(F))) + + //The method generate a glycan by reading the glycan structure string from database. + // input : (N(H(A))(N(H(A))(F))), output: Glycan object. public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglycan = false) { - Node node = Struct2Node(theGlycanStruct); - List nodeIons = GetAllChildrenCombination(node); - int mass = Glycan.GetMass(theGlycanStruct); - byte[] kind = Glycan.GetKind(theGlycanStruct); + Node node = Struct2Node(theGlycanStruct); //Transfer string to tree structure. + List nodeIons = GetAllChildrenCombination(node); //Get all possible fragmentation/neutral loss of a glycan. + int mass = Glycan.GetMass(theGlycanStruct); //Get glycan mass. + byte[] kind = Glycan.GetKind(theGlycanStruct); //Get glycan composition, which is a byte array, EX. [2, 5, 1, 1, 1, 1, 1, 1, 1, 1]. List glycanIons = new List(); HashSet ionMasses = new HashSet(); foreach (var aNodeIon in nodeIons) @@ -193,25 +196,26 @@ public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglyca return glycan; } - //Glycan are represented in tree structures composed of Node. The function here is to transfer a string into connected Node. + //The function here is to transfer a glycan-string into tree format. (Glycan are represented in tree structures composed of Node) + //input: (N(H)), output: Node(N, 0) -> left Child = Node(H, 1) public static Node Struct2Node(string theGlycanStruct) { int level = 0; - Node curr = new Node(theGlycanStruct[1], level); - for (int i = 2; i < theGlycanStruct.Length - 1; i++) + Node curr = new Node(theGlycanStruct[1], level);//The first character is always '(', so the second character is the root of the tree. In this case of (N(H)), N is the root. + for (int i = 2; i < theGlycanStruct.Length - 1; i++) //try to extract the following characters. { - if (theGlycanStruct[i] == '(') + if (theGlycanStruct[i] == '(') //skip the '(' character. { continue; } - if (theGlycanStruct[i] == ')') + if (theGlycanStruct[i] == ')')//when we meet a ')', we need to go back to the father node. { curr = curr.Father; level--; } - else + else // when we meet a character, we need to decide where to put it in the tree. (putting priority: left -> right side -> middle) { - level++; + level++; //first, move to the next level.(Deeper level) if (curr.LeftChild == null) { curr.LeftChild = new Node(theGlycanStruct[i], level); @@ -233,7 +237,8 @@ public static Node Struct2Node(string theGlycanStruct) } } } - return curr; + return curr; // return the root of the tree. + } //The function is to generate all possible fragmentation/neutral loss of a glycan, which is a subset of glycan. @@ -364,6 +369,7 @@ private static List GetAllChildrenCombination(Node node) } //Node structure to string structure. + // input: Node(N, 0) -> left Child = Node(H, 1), output: (N(H)) private static string Node2Struct(Node node) { string output = ""; @@ -389,7 +395,7 @@ public static int GetIonLossMass(byte[] Kind, byte[] ionKind) #region Transfer information - private static int GetMass(string structure) + private static int GetMass(string structure) //Get glycan mass by glycan structure string. structure format : (N(H(A))(N(H(A))(F))) { int y = CharMassDic['H'] * structure.Count(p => p == 'H') + CharMassDic['N'] * structure.Count(p => p == 'N') + @@ -405,7 +411,7 @@ private static int GetMass(string structure) return y; } - public static int GetMass(byte[] kind) + public static int GetMass(byte[] kind) //Get glycan mass by glycan composition. kind format : [2, 2, 2, 0, 1, 0, 0, 0, 0, 0] { int mass = CharMassDic['H'] * kind[0] + CharMassDic['N'] * kind[1] + @@ -422,7 +428,7 @@ public static int GetMass(byte[] kind) return mass; } - public static byte[] GetKind(string structure) + public static byte[] GetKind(string structure) //Get glycan composition by the structure string. structure format : (N(H(A))(N(H(A))(F))), output : [2, 2, 2, 0, 1, 0, 0, 0, 0, 0] { var kind = new byte[] { Convert.ToByte(structure.Count(p => p == 'H')), @@ -439,7 +445,7 @@ public static byte[] GetKind(string structure) return kind; } - public static string GetKindString(byte[] Kind) + public static string GetKindString(byte[] Kind)//Get glycan composition by the kind[]. kind format : [2, 2, 2, 0, 1, 0, 0, 0, 0, 0], output is H2N2A2F1. { string H = Kind[0]==0 ? "" : "H" + Kind[0].ToString(); string N = Kind[1] == 0 ? "" : "N" + Kind[1].ToString(); @@ -488,7 +494,7 @@ public static Modification NGlycanToModification(Glycan glycan) return modification; } - public static Modification OGlycanToModification(Glycan glycan) + public static Modification OGlycanToModification(Glycan glycan) //try to transfer the glycan object to modification object. { //TO THINK: what the neutralLoss for O-Glyco? Dictionary> neutralLosses = new Dictionary>(); @@ -528,6 +534,8 @@ public static IEnumerable> GetKCombs(IEnumerable list, int return GetKCombs(list, length - 1).SelectMany(t => list.Where(o => o.CompareTo(t.Last()) > 0), (t1, t2) => t1.Concat(new T[] { t2 })); } + // Try to create the combination with the list, and repeptitation is allowed. + // List is the base list, the length is the length for combination public static IEnumerable> GetKCombsWithRept(IEnumerable list, int length) where T : IComparable { if (length == 1) return list.Select(t => new T[] { t }); @@ -573,7 +581,7 @@ public static bool Equals(Glycan glycan1, Glycan glycan2) return false; } - public static Glycan[] BuildTargetDecoyGlycans(IEnumerable glycans) + public static Glycan[] BuildTargetDecoyGlycans(IEnumerable glycans) //Build target-decoy glycans for testing. { List allGlycans = new List(); diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs index 9a1d0f5d2..52f44a785 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs @@ -12,6 +12,8 @@ namespace EngineLayer //One peptide can have several o-glycans. The combined glycans are grouped as a glycan box. Used for localization. //GlycanBox -- A defined combination of glycans will be considered to modify on one peptide. //The GlycanBoxMass is the total mass of all glycans on the peptide + //For example, if we have 3 glycans on one peptide (g1,g2,g3), the GlycanBoxMass is the sum of the three glycans.(glycanBox: [g1,g2,g3]) + //By the way, the GlycanBox will be first step in the search, the parameter (Max glycan num in peptide) will be used to limit the capacity of the list. public class GlycanBox:ModBox { public static Glycan[] GlobalOGlycans { get; set; } @@ -22,7 +24,7 @@ public class GlycanBox:ModBox //TO DO: Decoy O-glycan can be created, but the results need to be reasoned. //public static int[] SugarShift = new int[]{ -16205282, -20307937, -29109542, -14605791, -30709033, -15005282, -36513219, -40615874, 16205282, 20307937, 29109542, 14605791, 30709033, 15005282, 36513219, 40615874 }; - private readonly static int[] SugarShift = new int[] + private readonly static int[] SugarShift = new int[] //still unclear about the shift... { 7103710, 10300920, 11502690, 12904260, 14706840, 5702150, 13705890, 12809500, 11308410, 13104050, 11404290, 9705280, 12805860, 15610110, 8703200, 10104770, 9906840, 18607930, 16306330, diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs index ddc64d7f6..27c8cd5a0 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs @@ -6,10 +6,10 @@ namespace EngineLayer { - - public static class GlycanDatabase + // in our database, the N-glycan.gdb should be correct to the new format + public static class GlycanDatabase { - //Load Glycan. Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions. + //Load Glycan from the database file (located in the Glycan_Mod). Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions. public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIons, bool IsOGlycanSearch) { bool isKind = true; @@ -18,7 +18,7 @@ public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIon while(lines.Peek() != -1) { string line = lines.ReadLine(); - if (!line.Contains("HexNAc")) + if (!line.Contains("HexNAc")) //use the first line to determine the type of glycan database. { isKind = false; } @@ -28,11 +28,11 @@ public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIon if (isKind) { - return LoadKindGlycan(filePath, ToGenerateIons, IsOGlycanSearch); + return LoadKindGlycan(filePath, ToGenerateIons, IsOGlycanSearch); // open the file of the kind format, example: HexNAc(2)Hex(5)NeuAc(1)Fuc(1) } else { - return LoadStructureGlycan(filePath, IsOGlycanSearch); + return LoadStructureGlycan(filePath, IsOGlycanSearch); // open the file of the structure format, example: (N(H(A))(A)) } } @@ -51,9 +51,9 @@ public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerat continue; } - var kind = String2Kind(line); + var kind = String2Kind(line); // convert the database string to kind[] format (byte array). - var glycan = new Glycan(kind); + var glycan = new Glycan(kind); // use the kind[] to create a glycan object. glycan.GlyId = id++; if (ToGenerateIons) { @@ -71,7 +71,9 @@ public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerat } } - public static byte[] String2Kind(string line) + //Convert the string to byte array. + //Input example: HexNAc(2)Hex(5)NeuAc(1)Fuc(1), Output example: [2, 5, 0, 0, 1, 0, 0, 0, 0, 1] + public static byte[] String2Kind(string line) { byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; var x = line.Split(new char[] { '(', ')' }); @@ -94,7 +96,7 @@ public static IEnumerable LoadStructureGlycan(string filePath, bool IsOG while (glycans.Peek() != -1) { string line = glycans.ReadLine(); - yield return Glycan.Struct2Glycan(line, id++, IsOGlycan); + yield return Glycan.Struct2Glycan(line, id++, IsOGlycan); // Directly convert the string to Glycan object. } } } @@ -102,6 +104,7 @@ public static IEnumerable LoadStructureGlycan(string filePath, bool IsOG //This function build fragments based on the general core of NGlyco fragments. //From https://github.com/mobiusklein/glycopeptidepy/structure/fragmentation_strategy/glycan.py#L408 //The fragment generation is not as good as structure based method. So it is better to use a structure based N-Glycan database. + // The function is used to load the database from the different formats, but we don't use it now. public static List NGlycanCompositionFragments(byte[] kind) { int glycan_mass = Glycan.GetMass(kind); diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs index b7557c83e..0373cd838 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs @@ -10,8 +10,8 @@ namespace EngineLayer.GlycoSearch { - public static class GlycoPeptides - { + public static class GlycoPeptides + { // a little bit confused part..., I guess the function will generate a list of isotopic intesitry of the oxonium ions. public static double[] ScanOxoniumIonFilter(Ms2ScanWithSpecificMass theScan, MassDiffAcceptor massDiffAcceptor) { double[] oxoniumIonsintensities = new double[Glycan.AllOxoniumIons.Length]; diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs index b1e8bccf4..32b48dda0 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs @@ -30,6 +30,7 @@ public class GlycoSearchEngine : ModernSearchEngine private readonly List[] SecondFragmentIndex; + // The constructor for GlycoSearchEngine, we can load the parameter for the searhcing like mode, topN, maxOGlycanNum, oxoniumIonFilter, datsbase, etc. public GlycoSearchEngine(List[] globalCsms, Ms2ScanWithSpecificMass[] listOfSortedms2Scans, List peptideIndex, List[] fragmentIndex, List[] secondFragmentIndex, int currentPartition, CommonParameters commonParameters, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, string oglycanDatabase, string nglycanDatabase, GlycoSearchType glycoSearchType, int glycoSearchTopNum, int maxOGlycanNum, bool oxoniumIonFilter, List nestedIds) @@ -48,19 +49,19 @@ public GlycoSearchEngine(List[] globalCsms, Ms2ScanWithSpeci ProductSearchMode = new SinglePpmAroundZeroSearchMode(20); //For Oxonium ion only - if (glycoSearchType == GlycoSearchType.OGlycanSearch) + if (glycoSearchType == GlycoSearchType.OGlycanSearch) //if we do the O-glycan search, we need to load the O-glycan database and generate the glycoBox. { GlycanBox.GlobalOGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.OGlycanLocations.Where(p => System.IO.Path.GetFileName(p) == _oglycanDatabase).First(), true, true).ToArray(); GlycanBox.GlobalOGlycanModifications = GlycanBox.BuildGlobalOGlycanModifications(GlycanBox.GlobalOGlycans); - GlycanBox.OGlycanBoxes = GlycanBox.BuildOGlycanBoxes(_maxOGlycanNum, false).OrderBy(p => p.Mass).ToArray(); + GlycanBox.OGlycanBoxes = GlycanBox.BuildOGlycanBoxes(_maxOGlycanNum, false).OrderBy(p => p.Mass).ToArray(); //generate glycan box for O-glycan search } - else if (glycoSearchType == GlycoSearchType.NGlycanSearch) + else if (glycoSearchType == GlycoSearchType.NGlycanSearch) //because the there is only one glycan in N-glycanpeptide, so we don't need to build the n-glycanBox here. { NGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.NGlycanLocations.Where(p => System.IO.Path.GetFileName(p) == _nglycanDatabase).First(), true, false).OrderBy(p => p.Mass).ToArray(); //TO THINK: Glycan Decoy database. //DecoyGlycans = Glycan.BuildTargetDecoyGlycans(NGlycans); } - else if (glycoSearchType == GlycoSearchType.N_O_GlycanSearch) + else if (glycoSearchType == GlycoSearchType.N_O_GlycanSearch) //search both N-glycan and O-glycan is still not tested and build completely yet. { GlycanBox.GlobalOGlycans = GlycanDatabase.LoadGlycan(GlobalVariables.OGlycanLocations.Where(p => System.IO.Path.GetFileName(p) == _oglycanDatabase).First(), true, true).ToArray(); GlycanBox.GlobalOGlycanModifications = GlycanBox.BuildGlobalOGlycanModifications(GlycanBox.GlobalOGlycans); @@ -85,7 +86,7 @@ protected override MetaMorpheusEngineResults RunSpecific() byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff; int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile; - int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray(); + int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray(); // We can do the parallel search on different threads Parallel.ForEach(threads, (scanIndex) => { byte[] scoringTable = new byte[PeptideIndex.Count]; @@ -272,7 +273,7 @@ private void Add2GlobalGsms(ref List gsms, int scanIndex) } } - //For FindOGlycan + //For FindOGlycan, generate the gsms for O-glycan search private GlycoSpectralMatch CreateGsm(Ms2ScanWithSpecificMass theScan, int scanIndex, int rank, PeptideWithSetModifications peptide, Route localization, double[] oxoniumIonIntensities, List localizationGraphs) { var peptideWithMod = GlycoPeptides.OGlyGetTheoreticalPeptide(localization, peptide); @@ -348,7 +349,7 @@ private GlycoSpectralMatch CreateGsm(Ms2ScanWithSpecificMass theScan, int scanIn } else { - psmGlyco.R138vs144 = oxoniumIonIntensities[4] / oxoniumIonIntensities[5]; + psmGlyco.R138vs144 = oxoniumIonIntensities[4] / oxoniumIonIntensities[5]; // if the ratio is high, that means the glycan is more likely to be N-glycan. Oppsitely, ration is small means close to O-glycan. } return psmGlyco; @@ -370,16 +371,17 @@ private void FindSingle(Ms2ScanWithSpecificMass theScan, int scanIndex, int scor } } + // private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int scoreCutOff, PeptideWithSetModifications theScanBestPeptide, int ind, double possibleGlycanMassLow, double[] oxoniumIonIntensities, ref List possibleMatches) { - int iDLow = GlycoPeptides.BinarySearchGetIndex(GlycanBox.OGlycanBoxes.Select(p => p.Mass).ToArray(), possibleGlycanMassLow); + int iDLow = GlycoPeptides.BinarySearchGetIndex(GlycanBox.OGlycanBoxes.Select(p => p.Mass).ToArray(), possibleGlycanMassLow); // try to find the index that closet match to the "possibleGlycanMassLow" within the glycanBox - int[] modPos = GlycoSpectralMatch.GetPossibleModSites(theScanBestPeptide, new string[] { "S", "T" }).OrderBy(p => p).ToArray(); + int[] modPos = GlycoSpectralMatch.GetPossibleModSites(theScanBestPeptide, new string[] { "S", "T" }).OrderBy(p => p).ToArray(); //list all of the possible glycoslation site/postition var localizationScan = theScan; - List products = new List(); + List products = new List(); // product list for the theoretical fragment ions - //For HCD-pd-ETD or CD-pd-EThcD type of data + //For HCD-pd-ETD or CD-pd-EThcD type of data, we generate the different rpoducts. if (theScan.ChildScans.Count > 0 && GlycoPeptides.DissociationTypeContainETD(CommonParameters.MS2ChildScanDissociationType, CommonParameters.CustomIons)) { localizationScan = theScan.ChildScans.First(); @@ -396,14 +398,14 @@ private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco //No localization can be done with MS2-HCD spectrum //TO THINK: there is a special situation. The HCD only scan from HCD-pd-EThcD data can be a glycopeptide, but there is no ETD, so there is no localization. What to do with this? bool is_HCD_only_data = !GlycoPeptides.DissociationTypeContainETD(CommonParameters.DissociationType, CommonParameters.CustomIons) && !GlycoPeptides.DissociationTypeContainETD(CommonParameters.MS2ChildScanDissociationType, CommonParameters.CustomIons); - if (is_HCD_only_data) + if (is_HCD_only_data) // In the HCD, there is no Y ion, so we don't need to consider the modification here. { theScanBestPeptide.Fragment(DissociationType.HCD, FragmentationTerminus.Both, products); } double bestLocalizedScore = 0; - List localizationGraphs = new List(); + List localizationGraphs = new List(); // if we also have ETD, then we will search the localization while (iDLow < GlycanBox.OGlycanBoxes.Count() && (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass + GlycanBox.OGlycanBoxes[iDLow].Mass))) { @@ -519,7 +521,7 @@ private void FindNGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco } } - + // Conduct the search and generate the gsms for N-glycan search private List FindNGlycopeptide(Ms2ScanWithSpecificMass theScan, List idsOfPeptidesPossiblyObserved, int scanIndex, int scoreCutOff) { List possibleMatches = new List(); @@ -566,6 +568,9 @@ private List FindNGlycopeptide(Ms2ScanWithSpecificMass theSc } return possibleMatches; } + + // This function conduct the search and generate the glyco search match spectrum (gsms). + // The search is the modern search to check each possible peptide candidate. private List FindOGlycopeptideHashLocal(Ms2ScanWithSpecificMass theScan, List idsOfPeptidesPossiblyObserved, int scanIndex, int scoreCutOff) { List possibleMatches = new List(); @@ -574,11 +579,11 @@ private List FindOGlycopeptideHashLocal(Ms2ScanWithSpecificM { var theScanBestPeptide = PeptideIndex[idsOfPeptidesPossiblyObserved[ind]]; - if (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass)) + if (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass)) // If the peptide mass is indentical to the precursor mass (or within the tolerance), we can directly search the glycopeptide. { FindSingle(theScan, scanIndex, scoreCutOff, theScanBestPeptide, ind, ref possibleMatches); } - else if (theScan.PrecursorMass - theScanBestPeptide.MonoisotopicMass >= 100) //Filter out unknow non-glycan modifications. + else if (theScan.PrecursorMass - theScanBestPeptide.MonoisotopicMass >= 100) //If not, we need to consider the glycan mass difference. { //Filter by glycanBoxes mass difference. var possibleGlycanMassLow = PrecusorSearchMode.GetMinimumValue(theScan.PrecursorMass) - theScanBestPeptide.MonoisotopicMass; @@ -587,7 +592,7 @@ private List FindOGlycopeptideHashLocal(Ms2ScanWithSpecificM if (possibleGlycanMassHigh < GlycanBox.OGlycanBoxes.First().Mass || possibleGlycanMassLow > GlycanBox.OGlycanBoxes.Last().Mass) { - continue; + continue; // if the glycan mass difference is out of the range of the glycan box, we can skip this peptide. } //Filter by OxoniumIon diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs index acc6db3be..f35e23131 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs @@ -113,7 +113,7 @@ public static bool MotifExist(string baseSeq, string[] motifs) return false; } - public static string GetTabSepHeaderSingle() + public static string GetTabSepHeaderSingle() //Most complicate part in this class, writing function to input the outcome into the excel file { var sb = new StringBuilder(); sb.Append("File Name" + '\t'); @@ -358,6 +358,7 @@ public static Dictionary MatchedIonDataDictionary(List ; Input: List + // example: {18, 1, Ture}, means the 18th glycan is localized on the 1st position of the peptide. public static List> GetLocalizedGlycan(List OGlycanBoxLocalization, out LocalizationLevel localizationLevel) { List> localizedGlycan = new List>(); diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs b/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs index 7113366bb..84f3c61df 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/ModBox.cs @@ -1,6 +1,6 @@ namespace EngineLayer { - public class ModBox + public class ModBox //The superclass of GlycanBox { //One peptide can have several modifications. The combined modifications are grouped as a modification box. Used for localization. //ModBox -- a defined combination of modifications will be considered to modify on one peptide. The box means the combined group of modification. diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs index 92f915a3b..64000438f 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs @@ -2,7 +2,7 @@ namespace EngineLayer { - public class Node + public class Node //The structure of the glycan { public Node(char v, Node l, Node r, Node m) { From 2e99da21b381bf171266b4657312f8c109a43233 Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Tue, 4 Jun 2024 14:01:19 -0500 Subject: [PATCH 02/13] Try to add the search summary information (PSMs, protein group, glycoPsms, Level1-glycoPsms) into the "AllResult.txt" file for glycoSearch (1) adding text function in PostGlycoSearchAnalysisTask class (2) adding tester in TestOGlyco class (make sure we parse the certain value) (3) revise the "readCsv", enable to read the allPSMs file smoothly. --- MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs | 34 ++++++---- .../PostGlycoSearchAnalysisTask.cs | 31 +++++++-- MetaMorpheus/Test/TestOGlyco.cs | 64 +++++++++++++++++++ 3 files changed, 113 insertions(+), 16 deletions(-) diff --git a/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs b/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs index 75f136ba9..1013681e7 100644 --- a/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs +++ b/MetaMorpheus/EngineLayer/PsmTsv/PsmFromTsv.cs @@ -207,19 +207,31 @@ public PsmFromTsv(string line, char[] split, Dictionary parsedHeade BetaPeptideChildScanMatchedIons.Remove(Ms2ScanNumber); } - //For Glyco - GlycanMass = (parsedHeader[PsmTsvHeader_Glyco.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[PsmTsvHeader_Glyco.GlycanMass]], CultureInfo.InvariantCulture); - GlycanComposition = (parsedHeader[PsmTsvHeader_Glyco.GlycanComposition] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanComposition]]; - GlycanStructure = (parsedHeader[PsmTsvHeader_Glyco.GlycanStructure] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanStructure]]; - var localizationLevel = (parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel]]; - if (localizationLevel != null) + //For Glyco + try // Try is so that glyco and non-glyco psms can be read from the same file { - if (localizationLevel.Equals("NA")) - GlycanLocalizationLevel = null; - else - GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel); + GlycanMass = (parsedHeader[PsmTsvHeader_Glyco.GlycanMass] < 0) ? null : (double?)double.Parse(spl[parsedHeader[PsmTsvHeader_Glyco.GlycanMass]], CultureInfo.InvariantCulture); + GlycanComposition = (parsedHeader[PsmTsvHeader_Glyco.GlycanComposition] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanComposition]]; + GlycanStructure = (parsedHeader[PsmTsvHeader_Glyco.GlycanStructure] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanStructure]]; + var localizationLevel = (parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.GlycanLocalizationLevel]]; + if (localizationLevel != null) + { + if (localizationLevel.Equals("NA")) + GlycanLocalizationLevel = null; + else + GlycanLocalizationLevel = (LocalizationLevel)Enum.Parse(typeof(LocalizationLevel), localizationLevel); + } + LocalizedGlycan = (parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan]]; + + } + catch + { + GlycanMass = null; + GlycanComposition = null; + GlycanStructure = null; + GlycanLocalizationLevel = null; + LocalizedGlycan = null; } - LocalizedGlycan = (parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan] < 0) ? null : spl[parsedHeader[PsmTsvHeader_Glyco.LocalizedGlycan]]; } /// diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index 6578a1790..0ff7ebc57 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -96,7 +96,13 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li var protein_oglyco_localization_file = Path.Combine(OutputFolder + "\\protein_oglyco_localization" + ".tsv"); WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_oglyco_localization_file); - WriteGlycoFile.WritePsmGlycoToTsv(allPsmsOgly, writtenFileOGlyco, true); //we write this last so localization can be attempted + // Writing the oglyco results to a file and summary text + WriteGlycoFile.WritePsmGlycoToTsv(allPsmsOgly, writtenFileOGlyco, true); //we write this last so localization can be attempted + MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + allPsmsOgly. + Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant)); + MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + allPsmsOgly + .Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); + } break; case GlycoSearchType.NGlycanSearch: @@ -114,6 +120,10 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li var protein_nglyco_localization_file = Path.Combine(OutputFolder + "\\protein_nglyco_localization" + ".tsv"); WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_nglyco_localization_file); WriteGlycoFile.WritePsmGlycoToTsv(allPsmsNgly, writtenFileNGlyco, true); //we write this last so localization can be attempted + MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + allPsmsNgly. //we write the search summary into the Allresult file + Count(p => p.FdrInfo.QValue < 0.01 && !p.IsContaminant && !p.IsDecoy)); + MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + allPsmsNgly + .Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); } break; case GlycoSearchType.N_O_GlycanSearch: @@ -132,13 +142,17 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li var protein_no_glyco_localization_file = Path.Combine(OutputFolder + "\\protein_no_glyco_localization" + ".tsv"); WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_no_glyco_localization_file); WriteGlycoFile.WritePsmGlycoToTsv(allPsmsgly, writtenFileNOGlyco, true); //we write this last so localization can be attempted + MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + allPsmsgly. + Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant)); + MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + allPsmsgly + .Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); } break; } if (glycoSearchParameters.DoParsimony) { - GlycoProteinAnalysis(filteredGsms, OutputFolder);//Do the whole group last so inference is done on the whole group + GlycoProteinAnalysis(filteredGsms, OutputFolder, null, MyTaskResults);//Do the whole group last so inference is done on the whole group } else { @@ -150,6 +164,9 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li var writtenFileSingle = Path.Combine(OutputFolder, "AllPSMs.psmtsv"); WriteGlycoFile.WritePsmGlycoToTsv(filteredGsms, writtenFileSingle, true); + MyTaskResults.AddTaskSummaryText("All target PSMs within 1% FDR: " + filteredGsms. + Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant)); + if (Parameters.GlycoSearchParameters.WriteSpectrumLibrary) { @@ -231,7 +248,7 @@ private void SingleFDRAnalysis(List items, CommonParameters new FdrAnalysisEngine(psms, 0, commonParameters, this.FileSpecificParameters, taskIds).Run(); } - private void GlycoProteinAnalysis(List gsms, string outputFolder, string individualFileFolder = null) + private void GlycoProteinAnalysis(List gsms, string outputFolder, string individualFileFolder = null, MyTaskResults myTaskResults = null) { // convert gsms to psms List psmsForProteinParsimony = gsms.Select(p => p as SpectralMatch).ToList(); @@ -248,7 +265,7 @@ private void GlycoProteinAnalysis(List gsms, string outputFo ProteinGroups = proteinScoringAndFdrResults.SortedAndScoredProteinGroups; Status("Done constructing protein groups!", Parameters.SearchTaskId); - WriteProteinResults(outputFolder, individualFileFolder); + WriteProteinResults(outputFolder, individualFileFolder, myTaskResults); } private void GlycoAccessionAnalysis(List gsms, string individualFileFolderPath, string individualFileFolder = null) { @@ -285,13 +302,17 @@ private void GlycoAccessionAnalysis(List gsms, string indivi } } } - private void WriteProteinResults(string outputFolder, string individualFileFolder = null) + private void WriteProteinResults(string outputFolder, string individualFileFolder = null, MyTaskResults myTaskResults = null) { double qValueCutoff_FORDEBUGONLY = 0.01; string fileName = "AllProteinGroups.tsv"; string writtenFile = Path.Combine(outputFolder, individualFileFolder + "_"+ fileName); WriteProteinGroupsToTsv(ProteinGroups, writtenFile, new List { Parameters.SearchTaskId }, qValueCutoff_FORDEBUGONLY); + if (myTaskResults is not null) + myTaskResults.AddTaskSummaryText("All target protein groups within 1% FDR: " + ProteinGroups. + Count(p => p.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant)); + } private void WriteProteinGroupsToTsv(List proteinGroups, string filePath, List nestedIds, double qValueCutoff) { diff --git a/MetaMorpheus/Test/TestOGlyco.cs b/MetaMorpheus/Test/TestOGlyco.cs index fdfbb6f32..4407c4a35 100644 --- a/MetaMorpheus/Test/TestOGlyco.cs +++ b/MetaMorpheus/Test/TestOGlyco.cs @@ -20,6 +20,11 @@ using Readers; using System.Text; using Omics.Modifications; +using ThermoFisher.CommonCore.BackgroundSubtraction; +using Easy.Common.Extensions; +using iText.IO.Font.Otf; +using static Nett.TomlObjectFactory; +using Omics.SpectrumMatch; namespace Test { @@ -479,6 +484,65 @@ public static void OGlycoTest_Run5() string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoPepMix_snip.mzML"); new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, new List { spectraFile }, new List { targetDbForTask, contaminDbForTask }, outputFolder).Run(); + + // TODO: Test output, make sure the values on the results.txt really reflect the number counted in the csv files + // Parse values from results.txt + string resultsTextPath = Directory.GetFiles(outputFolder, "allResults.txt", SearchOption.TopDirectoryOnly) + .FirstOrDefault(); // Try to find the file name "allResults.txt" in the output folder + if (resultsTextPath is null) + Assert.Fail("Results file not found."); + string[] allResultTxtLines = File.ReadAllLines(resultsTextPath); //read all lines from the file + Assert.That(allResultTxtLines.Length > 0); // make sure there are lines in the file + + //For PSMs + var allPsmLine = allResultTxtLines.First(p => p.Contains("target PSMs within")); + int psmCount = int.Parse(allPsmLine.Split(':').Last().Trim()); + + //For ProteinGroups + var proteinGroupLine = allResultTxtLines.First(p => p.Contains("protein groups within")); + int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); + + //For GlycoPSMs + var glycoPsmLine = allResultTxtLines.First(p => p.Contains("Glyco PSMs within")); + int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file + + //For Level1GlycoPSMs + var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 Glyco PSMs within")); + int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file + + // Parse counted number from csv files + + //For PSMs + var allPsmPath = Path.Combine(outputFolder, "Task", "AllPSMs.psmtsv"); + List onePercentPsms1 = PsmTsvReader.ReadTsv(allPsmPath, out var errors2) + .Where(p => p.QValue <= 0.01).ToList(); + Assert.That(errors2.Count == 0);// if we cannot find the file, we will get an error message + int readInPsmsCount = onePercentPsms1.Count; + + //For ProteinGroups + var allProteinGroupsPath = Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv"); + string[] proteinGroupHeaders = File.ReadAllLines(allProteinGroupsPath).First().Split("\t"); + int readInProteinCount = File.ReadAllLines(Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv")).Skip(1) + .Select(line => line.Split('\t')) + .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) && qVaule < 0.01); + + //For GlycoPSMs + string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); + List onePercentoGlycoPsms = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects + .Where(p => p.QValue <= 0.01).ToList(); // the filtering (Q<0.01) + int readInGlycoPsmCount = onePercentoGlycoPsms.Count; // the gPSMs number with Fdr<0.01 + Assert.That(errors.Count == 0);// if we cannot find the file, we will get an error message + + //For Level1GlycoPSMs + int readInLevel1GlycoPsmCount = onePercentoGlycoPsms.Count(p => p.GlycanLocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1); //the level1 gPSMs number + + //Compare the numbers + Assert.That(psmCount, Is.EqualTo(readInPsmsCount)); + Assert.That(proteinGroupCount, Is.EqualTo(readInProteinCount)); + Assert.That(glycoPsmCount, Is.EqualTo(readInGlycoPsmCount)); + Assert.That(level1Psmcount, Is.EqualTo(readInLevel1GlycoPsmCount)); + + Directory.Delete(outputFolder, true); } From a0d2377d0c4ffcadf37fb8fbd6f2ef1101a1803a Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Wed, 5 Jun 2024 14:39:24 -0500 Subject: [PATCH 03/13] add the contaminant tester --- MetaMorpheus/Test/TestOGlyco.cs | 87 ++++++++++++++++++++++++++++++++- 1 file changed, 86 insertions(+), 1 deletion(-) diff --git a/MetaMorpheus/Test/TestOGlyco.cs b/MetaMorpheus/Test/TestOGlyco.cs index 4407c4a35..4df269f3f 100644 --- a/MetaMorpheus/Test/TestOGlyco.cs +++ b/MetaMorpheus/Test/TestOGlyco.cs @@ -482,7 +482,11 @@ public static void OGlycoTest_Run5() DbForTask targetDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoProteinFASTA_7proteins.fasta"), false); DbForTask contaminDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P13987_contaminant.fasta"), true); string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoPepMix_snip.mzML"); - new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, new List { spectraFile }, new List { targetDbForTask, contaminDbForTask }, outputFolder).Run(); + new EverythingRunnerEngine( + new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, + new List { spectraFile }, + new List { targetDbForTask, contaminDbForTask }, + outputFolder).Run(); // TODO: Test output, make sure the values on the results.txt really reflect the number counted in the csv files @@ -546,6 +550,87 @@ public static void OGlycoTest_Run5() Directory.Delete(outputFolder, true); } + [Test] + public static void OGlycoTest_Run5_WriteContaminants() + { + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); + Directory.CreateDirectory(outputFolder); + + var glycoSearchTask = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSnip.toml"), MetaMorpheusTask.tomlConfig); + glycoSearchTask._glycoSearchParameters.WriteContaminants = true; + + DbForTask targetDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoProteinFASTA_7proteins.fasta"), false); + DbForTask contaminDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P13987_contaminant.fasta"), true); + string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoPepMix_snip.mzML"); + new EverythingRunnerEngine( + new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, + new List { spectraFile }, + new List { targetDbForTask, contaminDbForTask }, + outputFolder).Run(); + + + // TODO: Test output, make sure the values on the results.txt really reflect the number counted in the csv files + // Parse values from results.txt + string resultsTextPath = Directory.GetFiles(outputFolder, "allResults.txt", SearchOption.TopDirectoryOnly) + .FirstOrDefault(); // Try to find the file name "allResults.txt" in the output folder + if (resultsTextPath is null) + Assert.Fail("Results file not found."); + string[] allResultTxtLines = File.ReadAllLines(resultsTextPath); //read all lines from the file + Assert.That(allResultTxtLines.Length > 0); // make sure there are lines in the file + + //For PSMs + var allPsmLine = allResultTxtLines.First(p => p.Contains("target PSMs within")); + int psmCount = int.Parse(allPsmLine.Split(':').Last().Trim()); + + //For ProteinGroups + var proteinGroupLine = allResultTxtLines.First(p => p.Contains("protein groups within")); + int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); + + //For GlycoPSMs + var glycoPsmLine = allResultTxtLines.First(p => p.Contains("Glyco PSMs within")); + int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file + + //For Level1GlycoPSMs + var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 Glyco PSMs within")); + int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file + + // Parse counted number from csv files + + //For PSMs + var allPsmPath = Path.Combine(outputFolder, "Task", "AllPSMs.psmtsv"); + List onePercentPsms1 = PsmTsvReader.ReadTsv(allPsmPath, out var errors2) + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C").ToList(); + Assert.That(errors2.Count == 0);// if we cannot find the file, we will get an error message + int readInPsmsCount = onePercentPsms1.Count; + + //For ProteinGroups + var allProteinGroupsPath = Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv"); + string[] proteinGroupHeaders = File.ReadAllLines(allProteinGroupsPath).First().Split("\t"); + int readInProteinCount = File.ReadAllLines(Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv")).Skip(1) + .Select(line => line.Split('\t')) + .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) + && qVaule < 0.01 && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "C"); + + //For GlycoPSMs + string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); + List onePercentoGlycoPsms = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C").ToList(); // the filtering (Q<0.01) + int readInGlycoPsmCount = onePercentoGlycoPsms.Count; // the gPSMs number with Fdr<0.01 + Assert.That(errors.Count == 0);// if we cannot find the file, we will get an error message + + //For Level1GlycoPSMs + int readInLevel1GlycoPsmCount = onePercentoGlycoPsms.Count(p => p.GlycanLocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1); //the level1 gPSMs number + + //Compare the numbers + Assert.That(psmCount, Is.EqualTo(readInPsmsCount)); + Assert.That(proteinGroupCount, Is.EqualTo(readInProteinCount)); + Assert.That(glycoPsmCount, Is.EqualTo(readInGlycoPsmCount)); + Assert.That(level1Psmcount, Is.EqualTo(readInLevel1GlycoPsmCount)); + + + Directory.Delete(outputFolder, true); + } + [Test] public static void OGlycoTest_Run6() { From 9ff4c2f448e2649472d5d960223e8be1420f8d65 Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Thu, 6 Jun 2024 11:48:12 -0500 Subject: [PATCH 04/13] Delet the unused constructor of Node class to cheat the coverage check --- MetaMorpheus/EngineLayer/GlycoSearch/Node.cs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs index 64000438f..7a8ddc918 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs @@ -1,18 +1,11 @@  namespace EngineLayer { - - public class Node //The structure of the glycan + /// + /// The structure of the glycan + /// + public class Node { - public Node(char v, Node l, Node r, Node m) - { - Value = v; - LeftChild = l; - RightChild = r; - MiddleChild = m; - Level = null; - } - public Node(char v) { Value = v; From 02bcfce4af115147061283f4454cb72cffe82fa4 Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Thu, 6 Jun 2024 17:13:58 -0500 Subject: [PATCH 05/13] Fix the Fdr filter (initial: < 0.1, now <= 0.1) --- MetaMorpheus/EngineLayer/GlycoSearch/Node.cs | 1 + .../PostGlycoSearchAnalysisTask.cs | 18 +++++++++--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs index 7a8ddc918..930e33d24 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/Node.cs @@ -6,6 +6,7 @@ namespace EngineLayer /// public class Node { + public Node(char v) { Value = v; diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index 0ff7ebc57..fb7e83f6a 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -44,7 +44,7 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li //This is all psms for all files including glyco- and non-glyco psms. SingleFDRAnalysis(allPSMs, commonParameters, new List { taskId }); - List filteredGsms = allPSMs.Where(p => p.FdrInfo.QValue < 0.01).ToList(); + List filteredGsms = allPSMs.Where(p => p.FdrInfo.QValue <= 0.01).ToList(); //write individual file results if (Parameters.GlycoSearchParameters.WriteIndividualFiles) @@ -99,9 +99,9 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li // Writing the oglyco results to a file and summary text WriteGlycoFile.WritePsmGlycoToTsv(allPsmsOgly, writtenFileOGlyco, true); //we write this last so localization can be attempted MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + allPsmsOgly. - Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant)); + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant)); MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + allPsmsOgly - .Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); + .Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); } break; @@ -121,9 +121,9 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_nglyco_localization_file); WriteGlycoFile.WritePsmGlycoToTsv(allPsmsNgly, writtenFileNGlyco, true); //we write this last so localization can be attempted MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + allPsmsNgly. //we write the search summary into the Allresult file - Count(p => p.FdrInfo.QValue < 0.01 && !p.IsContaminant && !p.IsDecoy)); + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsContaminant && !p.IsDecoy)); MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + allPsmsNgly - .Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); + .Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); } break; case GlycoSearchType.N_O_GlycanSearch: @@ -143,9 +143,9 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_no_glyco_localization_file); WriteGlycoFile.WritePsmGlycoToTsv(allPsmsgly, writtenFileNOGlyco, true); //we write this last so localization can be attempted MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + allPsmsgly. - Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant)); + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant)); MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + allPsmsgly - .Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); + .Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); } break; } @@ -165,7 +165,7 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li var writtenFileSingle = Path.Combine(OutputFolder, "AllPSMs.psmtsv"); WriteGlycoFile.WritePsmGlycoToTsv(filteredGsms, writtenFileSingle, true); MyTaskResults.AddTaskSummaryText("All target PSMs within 1% FDR: " + filteredGsms. - Count(p => p.FdrInfo.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant)); + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant)); if (Parameters.GlycoSearchParameters.WriteSpectrumLibrary) @@ -311,7 +311,7 @@ private void WriteProteinResults(string outputFolder, string individualFileFolde WriteProteinGroupsToTsv(ProteinGroups, writtenFile, new List { Parameters.SearchTaskId }, qValueCutoff_FORDEBUGONLY); if (myTaskResults is not null) myTaskResults.AddTaskSummaryText("All target protein groups within 1% FDR: " + ProteinGroups. - Count(p => p.QValue < 0.01 && !p.IsDecoy && !p.IsContaminant)); + Count(p => p.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant)); } private void WriteProteinGroupsToTsv(List proteinGroups, string filePath, List nestedIds, double qValueCutoff) From 206fd27739577d670052de49f04c5e37e4d26b07 Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Mon, 10 Jun 2024 15:20:04 -0500 Subject: [PATCH 06/13] Try to pass the coverage test, add the docoy filtering tester We also allow to get the same PSMs in duplicated files. --- .../GlycoSearchTask/GlycoSearchTask.cs | 2 +- MetaMorpheus/Test/TestOGlyco.cs | 121 ++++++++++++++++-- 2 files changed, 112 insertions(+), 11 deletions(-) diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs index 3d3e4ae0f..e3c7b0f6a 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/GlycoSearchTask.cs @@ -150,7 +150,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List(); //For each ms2scan, try to find the best candidate psm from the psms list. Do the localizaiton analysis. Add it into filteredAllPsms. - foreach (var gsmsPerScan in GsmPerScans.GroupBy(p => p.ScanNumber)) + foreach (var gsmsPerScan in GsmPerScans.GroupBy(p => (p.ScanNumber, p.FullFilePath))) { var glycos = RemoveSimilarSequenceDuplicates(gsmsPerScan.OrderByDescending(p=>p.Score).ToList()); diff --git a/MetaMorpheus/Test/TestOGlyco.cs b/MetaMorpheus/Test/TestOGlyco.cs index 4df269f3f..7bb97e3f5 100644 --- a/MetaMorpheus/Test/TestOGlyco.cs +++ b/MetaMorpheus/Test/TestOGlyco.cs @@ -550,14 +550,14 @@ public static void OGlycoTest_Run5() Directory.Delete(outputFolder, true); } - [Test] + [Test] public static void OGlycoTest_Run5_WriteContaminants() { string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); Directory.CreateDirectory(outputFolder); var glycoSearchTask = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSnip.toml"), MetaMorpheusTask.tomlConfig); - glycoSearchTask._glycoSearchParameters.WriteContaminants = true; + glycoSearchTask._glycoSearchParameters.WriteContaminants = true; // write contaminants to the output folder DbForTask targetDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoProteinFASTA_7proteins.fasta"), false); DbForTask contaminDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P13987_contaminant.fasta"), true); @@ -599,7 +599,7 @@ public static void OGlycoTest_Run5_WriteContaminants() //For PSMs var allPsmPath = Path.Combine(outputFolder, "Task", "AllPSMs.psmtsv"); List onePercentPsms1 = PsmTsvReader.ReadTsv(allPsmPath, out var errors2) - .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C").ToList(); + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); Assert.That(errors2.Count == 0);// if we cannot find the file, we will get an error message int readInPsmsCount = onePercentPsms1.Count; @@ -609,12 +609,13 @@ public static void OGlycoTest_Run5_WriteContaminants() int readInProteinCount = File.ReadAllLines(Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv")).Skip(1) .Select(line => line.Split('\t')) .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) - && qVaule < 0.01 && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "C"); + && qVaule < 0.01 && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "C" + && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "D"); //For GlycoPSMs string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); List onePercentoGlycoPsms = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects - .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C").ToList(); // the filtering (Q<0.01) + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); // the filtering (Q<0.01) int readInGlycoPsmCount = onePercentoGlycoPsms.Count; // the gPSMs number with Fdr<0.01 Assert.That(errors.Count == 0);// if we cannot find the file, we will get an error message @@ -631,6 +632,104 @@ public static void OGlycoTest_Run5_WriteContaminants() Directory.Delete(outputFolder, true); } + [Test] + public static void OGlycoTest_Run5_WriteDecoys() // Test writing decoys, and make sure we can filter the decoys PSMs + { + string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); + Directory.CreateDirectory(outputFolder); + + var glycoSearchTask = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSnip.toml"), MetaMorpheusTask.tomlConfig); + glycoSearchTask._glycoSearchParameters.WriteContaminants = true; + glycoSearchTask._glycoSearchParameters.WriteDecoys = true; + glycoSearchTask._glycoSearchParameters.DecoyType = DecoyType.Reverse; + glycoSearchTask.CommonParameters = new CommonParameters(dissociationType: DissociationType.HCD, trimMsMsPeaks: false, + precursorMassTolerance: new PpmTolerance(6), productMassTolerance: new PpmTolerance(10), qValueThreshold: 1, + pepQValueThreshold: 1, scoreCutoff: 1); + + DbForTask targetDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoProteinFASTA_7proteins.fasta"), false); + DbForTask dbContaminant = new DbForTask(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P02649.fasta"), true); + + List copiedSpectraFiles = new(); + string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoPepMix_snip.mzML"); // in order to get enough PSMs to test the filtering, we will copy the spectra file 19 times then get one decpys PSMs in the filterPSMs (Fdr <= 0.01, 1 decoys out of 200 target) + for(int i = 0; i < 19 ; i++) + { + var copyPath = Path.Combine(TestContext.CurrentContext.TestDirectory, $@"GlycoTestData\Copy{i}GlycoPepMix_snip.mzML"); + if (!File.Exists(copyPath)) + File.Copy(spectraFile, copyPath); + copiedSpectraFiles.Add(copyPath); + } + + new EverythingRunnerEngine( + new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, + new List { spectraFile, copiedSpectraFiles}, + new List { targetDbForTask, dbContaminant}, + outputFolder).Run(); + + + // TODO: Test output, make sure the values on the results.txt really reflect the number counted in the csv files + // Parse values from results.txt + string resultsTextPath = Directory.GetFiles(outputFolder, "allResults.txt", SearchOption.TopDirectoryOnly) + .FirstOrDefault(); // Try to find the file name "allResults.txt" in the output folder + if (resultsTextPath is null) + Assert.Fail("Results file not found."); + string[] allResultTxtLines = File.ReadAllLines(resultsTextPath); //read all lines from the file + Assert.That(allResultTxtLines.Length > 0); // make sure there are lines in the file + + //For PSMs + var allPsmLine = allResultTxtLines.First(p => p.Contains("target PSMs within")); + int psmCount = int.Parse(allPsmLine.Split(':').Last().Trim()); + + //For ProteinGroups + var proteinGroupLine = allResultTxtLines.First(p => p.Contains("protein groups within")); + int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); + + //For GlycoPSMs + var glycoPsmLine = allResultTxtLines.First(p => p.Contains("Glyco PSMs within")); + int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file + + //For Level1GlycoPSMs + var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 Glyco PSMs within")); + int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file + + // Parse counted number from csv files + + //For PSMs + var allPsmPath = Path.Combine(outputFolder, "Task", "AllPSMs.psmtsv"); + List onePercentPsms1 = PsmTsvReader.ReadTsv(allPsmPath, out var errors2) + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); + Assert.That(errors2.Count == 0);// if we cannot find the file, we will get an error message + int readInPsmsCount = onePercentPsms1.Count; + + //For ProteinGroups + var allProteinGroupsPath = Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv"); + string[] proteinGroupHeaders = File.ReadAllLines(allProteinGroupsPath).First().Split("\t"); + int readInProteinCount = File.ReadAllLines(Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv")).Skip(1) + .Select(line => line.Split('\t')) + .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) + && qVaule < 0.01 && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "C" //filter the contaminants + && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "D"); // filter the decoys + + //For GlycoPSMs + string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); + List onePercentoGlycoPsms = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); // the filtering (Q<0.01, decoy and contaminat) + int readInGlycoPsmCount = onePercentoGlycoPsms.Count; // the gPSMs number with Fdr<0.01 + Assert.That(errors.Count == 0);// if we cannot find the file, we will get an error message + + //For Level1GlycoPSMs + int readInLevel1GlycoPsmCount = onePercentoGlycoPsms.Count(p => p.GlycanLocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1); //the level1 gPSMs number + + //Compare the numbers + Assert.That(psmCount, Is.EqualTo(readInPsmsCount)); + Assert.That(proteinGroupCount, Is.EqualTo(readInProteinCount)); + Assert.That(glycoPsmCount, Is.EqualTo(readInGlycoPsmCount)); + Assert.That(level1Psmcount, Is.EqualTo(readInLevel1GlycoPsmCount)); + + + copiedSpectraFiles.ForEach(p => File.Delete(p)); + Directory.Delete(outputFolder, true); + } + [Test] public static void OGlycoTest_Run6() { @@ -1413,9 +1512,11 @@ public static void TestExperimentalDesignError() Directory.Delete(outputFolder, true); } [Test] - [TestCase(false, 2, 1, 1)] - [TestCase(true, 2, 3, 1)] - [TestCase(true, 2, 3, 2)] + [TestCase(false, 2, 1, 1)] // pre output: 1 intensity column, post output: 2 intensity column + [TestCase(true, 2, 3, 1)] // pre output: 1 intensity column, post output: 6 intensity column + [TestCase(true, 2, 3, 2)] // pre output: 1 intensity column, post output: 12 intensity column + + public static void TestGlycoProteinQuantFileHeaders(bool hasDefinedExperimentalDesign, int bioreps, int fractions, int techreps) { string condition = hasDefinedExperimentalDesign ? "TestCondition" : ""; @@ -1465,8 +1566,8 @@ public static void TestGlycoProteinQuantFileHeaders(bool hasDefinedExperimentalD List splitHeader = lines[0].Split(new char[] { '\t' }).ToList(); List intensityColumnHeaders = splitHeader.Where(p => p.Contains("Intensity", StringComparison.OrdinalIgnoreCase)).ToList(); - Assert.That(intensityColumnHeaders.Count == 1); - + Assert.That(intensityColumnHeaders.Count == bioreps* fractions* techreps); // We change the search funtion allowed to get the PSMs from the duplicate file. + // Ex. we have 2 bioreps, 3 fractions, 1 techrep, then we get 6 intensity columns Directory.Delete(outputFolder, true); } [Test] From 8a7f48d1aa694fc6f704df055afd2497d8be01ac Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Tue, 2 Jul 2024 17:06:21 -0500 Subject: [PATCH 07/13] update 7/2/2024 (1) Rewrite the Summary writing function (2)Add the comment in the fuction header --- .../EngineLayer/GlycoSearch/Glycan.cs | 165 ++++++++++++------ .../EngineLayer/GlycoSearch/GlycanBox.cs | 70 +++++--- .../EngineLayer/GlycoSearch/GlycanDatabase.cs | 70 ++++++-- .../EngineLayer/GlycoSearch/GlycoPeptides.cs | 107 +++++++++--- .../GlycoSearch/GlycoSearchEngine.cs | 117 ++++++++----- .../GlycoSearch/GlycoSpectralMatch.cs | 120 +++++++++---- .../GlycoSearch/LocalizationGraph.cs | 107 ++++++++---- .../ModernSearch/ModernSearchEngine.cs | 18 +- .../PostGlycoSearchAnalysisTask.cs | 122 ++++++++----- MetaMorpheus/Test/TestOGlyco.cs | 17 +- 10 files changed, 625 insertions(+), 288 deletions(-) diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs b/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs index 27c03a9c0..0ee4fe6e4 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs @@ -13,14 +13,14 @@ public class GlycanIon { public GlycanIon(string ionStruct, int ionMass, byte[] ionKind, int lossIonMass) { - IonStruct = ionStruct; + IonStruct = ionStruct; // Always set null, deprecated. IonMass = ionMass; IonKind = ionKind; - LossIonMass = lossIonMass; // neutral loss mass + LossIonMass = lossIonMass; // Neutral loss mass = Glycan.Mass - IonMass } public string IonStruct { get; set; } public int IonMass { get; set; } - public int LossIonMass { get; set; }//Glycan.Mass - IonMass + public int LossIonMass { get; set; } public byte[] IonKind { get; set; } } @@ -41,13 +41,13 @@ public Glycan(byte[] kind) Mass = GetMass(kind); } - public int GlyId { get; set; } - public string Struc { get; private set; } + public int GlyId { get; set; } // Glycan ID, which is the index of glycan in the glycan database. + public string Struc { get; private set; } // Glycan structure string represented the glycan structure and linkage. Ex. (N(H(A))(N(H(A))(F))) public int Mass { get; private set; } - //Glycans are composed of several different types of mono saccharides. In Kind, each number correspond to one type of mono saccharide in the same order as Glycan.CharMassDic. - public byte[] Kind { get; private set; } - public string Composition + + public byte[] Kind { get; private set; } // Glycans are composed of several types of mono suagr. In Kind, each number correspond to one type (corresponded order as Glycan.CharMassDic). + public string Composition // Glycan composition string. Ex. H2N2A2F1. { get { @@ -57,10 +57,10 @@ public string Composition public List Ions { get; set; } public bool Decoy { get; private set; } - public HashSet DiagnosticIons //B ions, and there are more ions to set... + public HashSet DiagnosticIons // B ions (the sugar fragment dropped from the glycopeptide), used for the N-glycan. There are more ions to set... { get - { //kind[] is the sugar type composition of glycan, and each index represent the corresponding sugar type. + { HashSet diagnosticIons = new HashSet(); if (Kind[0] >= 1) //if we have Hexose(the number more than one), then we have the corresponding diagonsitic ions as below. { @@ -68,7 +68,7 @@ public string Composition diagnosticIons.Add(11503951 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(16306064 - hydrogenAtomMonoisotopicMass); } - if (Kind[1] >= 1) + if (Kind[1] >= 1) // if we have HexNAc(the number more than one), then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(12605550 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(13805550 - hydrogenAtomMonoisotopicMass); @@ -77,16 +77,16 @@ public string Composition diagnosticIons.Add(18607663 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(20408720 - hydrogenAtomMonoisotopicMass); } - if (Kind[1] >= 1 && Kind[0] >= 1) + if (Kind[1] >= 1 && Kind[0] >= 1) // if we have HexNAc and Hexose, then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(36614002 - hydrogenAtomMonoisotopicMass); } - if (Kind[2] >= 1) + if (Kind[2] >= 1) //If we have NeuNAc, then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(27409268 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(29210324 - hydrogenAtomMonoisotopicMass); } - if (Kind[3] >= 1) + if (Kind[3] >= 1) //If we have NeuNGc, then we have the corresponding diagonsitic ions as below. { diagnosticIons.Add(29008759 - hydrogenAtomMonoisotopicMass); diagnosticIons.Add(30809816 - hydrogenAtomMonoisotopicMass); @@ -105,7 +105,6 @@ public string Composition //H: C6O5H10 Hexose, N: C8O5NH13 HexNAc, A: C11O8NH17 Neu5Ac, G: C11H17NO9 Neu5Gc, F: C6O4H10 Fucose, //P: PO3H Phosphate, S: SO3H Sulfo, Y: Na Sodium, C:Acetyl for Neu5Ac //X: C5H10O5 Xylose - //If add more monosacchrades here, please change GetMass, GetKind, GetKindString, GlycanBox constructor, search byte[]. private readonly static Dictionary CharMassDic = new Dictionary { { 'H', 16205282 }, { 'N', 20307937 }, @@ -119,7 +118,7 @@ public string Composition { 'X', 15005282 }, }; - //Compitable with Byonic, for loading glycan by Kind. + // The corresponding index for sugar and Kind. public readonly static Dictionary> NameCharDic = new Dictionary> { {"Hex", new Tuple('H', 0) }, @@ -134,13 +133,15 @@ public string Composition {"Xylose", new Tuple('X', 9) } }; - public readonly static HashSet CommonOxoniumIons = new HashSet //The same ion as we describe above in the diagnostic ions. That just for the initial matching with the gkycan. - {13805550, 16806607, 18607663, 20408720, 36614002 };// some software use the ions to predict verified glycopeptide (pre-filter). + //The same ion as we describe above in the diagnostic ions. That just for the initial filtering for glycopeptide peaks. Not used now. + public readonly static HashSet CommonOxoniumIons = new HashSet + {13805550, 16806607, 18607663, 20408720, 36614002 }; - public readonly static int[] AllOxoniumIons = new int[] //The same ion as we describe above in the diagnostic ions. We didn't use the ions for matching now. + //The same ion as we describe above in the diagnostic ions. Used for building the oxoniumIntensity list. + public readonly static int[] AllOxoniumIons = new int[] {10902895, 11503951, 12605550, 12703952, 13805550, 14406607, 16306064, 16806607, 18607663, 20408720, 27409268, 29008759, 29210324, 30809816, 36614002, 65723544, 67323035}; - //TrimannosylCore is only useful for N-Glyco peptides. + //TrimannosylCore. Only useful for N-Glyco peptides. public readonly static Dictionary TrimannosylCores = new Dictionary { //Each of the mass represent as a N-Glycan core. @@ -160,24 +161,30 @@ public string Composition #region Glycan Structure manipulation - //There are two ways to represent a glycan in string, one is only composition, and the other is included linkage and composition information. - // first one: HexNAc(2)Hex(5)NeuAc(1)NeuGc(1)Fuc(1)Phospho(1)Sulfo(1)Na(1)Ac(1)Xylose(1), second one: (N(H(A))(N(H(A))(F))) - - //The method generate a glycan by reading the glycan structure string from database. - // input : (N(H(A))(N(H(A))(F))), output: Glycan object. + //There are two ways to represent a glycan in string + //Composition: HexNAc(2)Hex(5)NeuAc(1)NeuGc(1)Fuc(1)Phospho(1)Sulfo(1)Na(1)Ac(1)Xylose(1), + //Struct(Linkage): (N(H(A))(N(H(A))(F))) + + /// + /// Only for Gdb. The method generate a glycan object by reading the glycan structure string from database. + /// + /// structrue string ex. (N(H(A))(N(H(A))(F))) + /// + /// + /// Glycan Object public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglycan = false) { - Node node = Struct2Node(theGlycanStruct); //Transfer string to tree structure. - List nodeIons = GetAllChildrenCombination(node); //Get all possible fragmentation/neutral loss of a glycan. - int mass = Glycan.GetMass(theGlycanStruct); //Get glycan mass. - byte[] kind = Glycan.GetKind(theGlycanStruct); //Get glycan composition, which is a byte array, EX. [2, 5, 1, 1, 1, 1, 1, 1, 1, 1]. + Node node = Struct2Node(theGlycanStruct); // String to tree structure. + List nodeIons = GetAllChildrenCombination(node); // Get all possible fragmentation & neutralLoss of a glycan. + int mass = Glycan.GetMass(theGlycanStruct); // Get glycan mass. + byte[] kind = Glycan.GetKind(theGlycanStruct); // Get glycan composition array, EX. [2, 5, 1, 1, 1, 1, 1, 1, 1, 1]. List glycanIons = new List(); HashSet ionMasses = new HashSet(); foreach (var aNodeIon in nodeIons) { - var ionMass = Glycan.GetMass(Node2Struct(aNodeIon)); - if (!ionMasses.Contains(ionMass) && ionMass != mass) - { + var ionMass = Glycan.GetMass(Node2Struct(aNodeIon)); // Get the ionMass + if (!ionMasses.Contains(ionMass) && ionMass != mass) // Avoid duplicate ions with the same mass. Ex. N(H)N and N(N(H)) have the same ionMass. + { // We also avoid the ionMass equals to the glycan mass. Because we won't assume the whole glycan is a fragment ion. ionMasses.Add(ionMass); var ionKind = Glycan.GetKind(Node2Struct(aNodeIon)); var lossIonMass = GetIonLossMass(kind, ionKind); @@ -189,33 +196,37 @@ public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglyca { glycanIons.Add(new GlycanIon(null, 8303819, new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, mass - 8303819)); //Cross-ring mass } - glycanIons.Add(new GlycanIon(null, 0, kind, mass)); + glycanIons.Add(new GlycanIon(null, 0, kind, mass)); //That is Y0 ion. The whole glycan dropped from the glycopeptide. Like a netural loss. Glycan glycan = new Glycan(theGlycanStruct, mass, kind, glycanIons.OrderBy(p => p.IonMass).ToList(), false); glycan.GlyId = id; return glycan; } - //The function here is to transfer a glycan-string into tree format. (Glycan are represented in tree structures composed of Node) - //input: (N(H)), output: Node(N, 0) -> left Child = Node(H, 1) + + /// + /// Convert the glycan structure string to tree format + /// + /// linkage inforamtion ex. (N(H)) + /// glycan tree node ex. Current Nonde = Node(N, 0), left Child = Node(H, 1) public static Node Struct2Node(string theGlycanStruct) { int level = 0; - Node curr = new Node(theGlycanStruct[1], level);//The first character is always '(', so the second character is the root of the tree. In this case of (N(H)), N is the root. - for (int i = 2; i < theGlycanStruct.Length - 1; i++) //try to extract the following characters. + Node curr = new Node(theGlycanStruct[1], level); // The first character is always '(', so the second character is the root of the tree. In this case of (N(H)), N is the root. + for (int i = 2; i < theGlycanStruct.Length - 1; i++) // Try to extract the following characters. { - if (theGlycanStruct[i] == '(') //skip the '(' character. + if (theGlycanStruct[i] == '(') // Skip the '(' character. { continue; } - if (theGlycanStruct[i] == ')')//when we meet a ')', we need to go back to the father node. + if (theGlycanStruct[i] == ')') // When we meet a ')', we need to go back to the parent node. { curr = curr.Father; level--; } - else // when we meet a character, we need to decide where to put it in the tree. (putting priority: left -> right side -> middle) + else // While meeting a character, we need to decide where to put it in the tree. (putting priority: left -> right side -> middle) { - level++; //first, move to the next level.(Deeper level) + level++; // Move to the level.(Deeper/Child level) if (curr.LeftChild == null) { curr.LeftChild = new Node(theGlycanStruct[i], level); @@ -237,12 +248,16 @@ public static Node Struct2Node(string theGlycanStruct) } } } - return curr; // return the root of the tree. + return curr; } - //The function is to generate all possible fragmentation/neutral loss of a glycan, which is a subset of glycan. - //Node is tree structured glycan. subset of glycans are also represented by Node. + + /// + /// Generate all possible fragments(subset) of a glycan. The fragments are also represented by a Node. + /// + /// + /// The all combination of the Glycan fragment. Presented by Node private static List GetAllChildrenCombination(Node node) { List nodes = new List(); @@ -380,7 +395,12 @@ private static string Node2Struct(Node node) return output; } - //kind are compositions of glycan. The function here is to generate mass difference of two glycan. + /// + /// Calculate the mass difference of two glycan kind. + /// + /// Composition of the glycan + /// Composition of the glycanIon + /// Mass different between the glycan and its glycanIon public static int GetIonLossMass(byte[] Kind, byte[] ionKind) { byte[] lossKind = new byte[Kind.Length]; @@ -394,8 +414,12 @@ public static int GetIonLossMass(byte[] Kind, byte[] ionKind) #endregion #region Transfer information - - private static int GetMass(string structure) //Get glycan mass by glycan structure string. structure format : (N(H(A))(N(H(A))(F))) + /// + /// Get glycan mass by glycan structure string + /// + /// ex.(N(H(A))(N(H(A))(F))) + /// The glycan Mass + private static int GetMass(string structure) { int y = CharMassDic['H'] * structure.Count(p => p == 'H') + CharMassDic['N'] * structure.Count(p => p == 'N') + @@ -411,7 +435,12 @@ private static int GetMass(string structure) //Get glycan mass by glycan structu return y; } - public static int GetMass(byte[] kind) //Get glycan mass by glycan composition. kind format : [2, 2, 2, 0, 1, 0, 0, 0, 0, 0] + /// + /// Get glycan mass by glycan composition + /// + /// [2, 2, 2, 0, 1, 0, 0, 0, 0, 0] + /// The glycan mass + public static int GetMass(byte[] kind) { int mass = CharMassDic['H'] * kind[0] + CharMassDic['N'] * kind[1] + @@ -428,7 +457,13 @@ public static int GetMass(byte[] kind) //Get glycan mass by glycan composition. return mass; } - public static byte[] GetKind(string structure) //Get glycan composition by the structure string. structure format : (N(H(A))(N(H(A))(F))), output : [2, 2, 2, 0, 1, 0, 0, 0, 0, 0] + + /// + /// Get glycan composition by the structure string + /// + /// structure format : (N(H(A))(N(H(A))(F))) + /// The kind List ex [2, 2, 2, 0, 1, 0, 0, 0, 0, 0]. + public static byte[] GetKind(string structure) { var kind = new byte[] { Convert.ToByte(structure.Count(p => p == 'H')), @@ -445,7 +480,13 @@ public static byte[] GetKind(string structure) //Get glycan composition by the s return kind; } - public static string GetKindString(byte[] Kind)//Get glycan composition by the kind[]. kind format : [2, 2, 2, 0, 1, 0, 0, 0, 0, 0], output is H2N2A2F1. + + /// + /// Get glycan composition text from the glycan kind[]. + /// + /// ex. [2, 2, 2, 0, 1, 0, 0, 0, 0, 0] + /// The composition text ex. H2N2A2F1 + public static string GetKindString(byte[] Kind) { string H = Kind[0]==0 ? "" : "H" + Kind[0].ToString(); string N = Kind[1] == 0 ? "" : "N" + Kind[1].ToString(); @@ -465,6 +506,12 @@ public static string GetKindString(byte[] Kind)//Get glycan composition by the k //TO THINK: Is it reasonable to transfer Glycan to Modification the first time Glycan is read in? Which could save time. //Use glycan index and modification index to reduce space. + + /// + /// Input the N-glycan object, and transfer it to the modification object. + /// + /// + /// public static Modification NGlycanToModification(Glycan glycan) { Dictionary> neutralLosses = new Dictionary>(); @@ -494,6 +541,11 @@ public static Modification NGlycanToModification(Glycan glycan) return modification; } + /// + /// Input the O-glycan object, and transfer it to the modification object. + /// + /// + /// The modification object public static Modification OGlycanToModification(Glycan glycan) //try to transfer the glycan object to modification object. { //TO THINK: what the neutralLoss for O-Glyco? @@ -528,14 +580,13 @@ public static Modification OGlycanToModification(Glycan glycan) //try to transfe #region Combination or Permutation functions not directly related to glycan, use carefully these function don't deal duplicate elements. + public static IEnumerable> GetKCombs(IEnumerable list, int length) where T : IComparable { - if (length == 1) return list.Select(t => new T[] { t }); + if (length == 1) return list.Select(t => new T[] { t }); // Return the list of the single element. return GetKCombs(list, length - 1).SelectMany(t => list.Where(o => o.CompareTo(t.Last()) > 0), (t1, t2) => t1.Concat(new T[] { t2 })); } - // Try to create the combination with the list, and repeptitation is allowed. - // List is the base list, the length is the length for combination public static IEnumerable> GetKCombsWithRept(IEnumerable list, int length) where T : IComparable { if (length == 1) return list.Select(t => new T[] { t }); @@ -546,7 +597,7 @@ public static IEnumerable> GetPermutations(IEnumerable list { if (length == 1) { - return list.Select(t => new T[] { t }); + return list.Select(t => new T[] { t }); } return GetPermutations(list, length - 1).SelectMany(t => list.Where(o => !t.Contains(o)), (t1, t2) => t1.Concat(new T[] { t2 })); } @@ -561,6 +612,12 @@ public static IEnumerable> GetPermutationsWithRept(IEnumerable #region Functions are not used now, could be useful in the future. + /// + /// Test the equality of two glycan objects. Including the glycan mass and the glycan ions should be totally indentical. + /// + /// + /// + /// public static bool Equals(Glycan glycan1, Glycan glycan2) { if (glycan1.Mass == glycan2.Mass) diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs index 52f44a785..845d7bafc 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs @@ -9,18 +9,21 @@ namespace EngineLayer { - //One peptide can have several o-glycans. The combined glycans are grouped as a glycan box. Used for localization. - //GlycanBox -- A defined combination of glycans will be considered to modify on one peptide. - //The GlycanBoxMass is the total mass of all glycans on the peptide - //For example, if we have 3 glycans on one peptide (g1,g2,g3), the GlycanBoxMass is the sum of the three glycans.(glycanBox: [g1,g2,g3]) - //By the way, the GlycanBox will be first step in the search, the parameter (Max glycan num in peptide) will be used to limit the capacity of the list. + + /// + /// A defined combination of glycans to modify on one peptide. Ex. if we have 3 glycans on one peptide (g1,g2,g3), the GlycanBoxMass is the sum of the three glycans.(glycanBox: [g1,g2,g3]) + /// public class GlycanBox:ModBox { - public static Glycan[] GlobalOGlycans { get; set; } + public static Glycan[] GlobalOGlycans { get; set; } // The glycan list in the database file + + public GlycanBox[] ChildGlycanBoxes { get; set; } // all possible glycan combinations in the glycanBox public static Modification[] GlobalOGlycanModifications { get; set; } - public static GlycanBox[] OGlycanBoxes { get; set; } + public static GlycanBox[] OGlycanBoxes { get; set; } // all possible glycan boxes + + public byte[] Kind { get; private set; } //TO DO: Decoy O-glycan can be created, but the results need to be reasoned. //public static int[] SugarShift = new int[]{ -16205282, -20307937, -29109542, -14605791, -30709033, -15005282, -36513219, -40615874, 16205282, 20307937, 29109542, 14605791, 30709033, 15005282, 36513219, 40615874 }; @@ -33,7 +36,11 @@ public class GlycanBox:ModBox }; - //After O-glycans are read in from database, we build combinations of glycans into GlycanBox. The maxNum is maximum glycans allowed on one peptides. + /// + /// Use the glycan from database to create all possible combination glycan set into GlycanBox. + /// + /// The maxNum is maximum glycans allowed on one peptides + /// The glycanBox collection, glycanBox[] public static IEnumerable BuildOGlycanBoxes(int maxNum) { return BuildOGlycanBoxes(maxNum, false); @@ -62,8 +69,11 @@ public static IEnumerable BuildOGlycanBoxes(int maxNum, bool buildDec } } - //After O-glycans are read in from database, we transfer the glycans into 'Modification' class type for MetaMorpheus to manipulate sequences. - //In the future we may able to combine the two type together. + /// + /// Convert the glycan into Modification type for MetaMorpheus to manipulate sequences. In the future we may able to combine the two type together. + /// + /// + /// public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGlycans) { Modification[] globalOGlycanModifications = new Modification[globalOGlycans.Length]; @@ -75,20 +85,26 @@ public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGly return globalOGlycanModifications; } - //The function here is to build GlycanBoxes used for LocalizationGraph. - //In LocalizationGraph matrix, for each AdjNode, it represent a ChildOGlycanBox here at certain glycosite. + + /// + /// Generate all possible child/fragment box of the specific glycanBox. The childBoxes is uesd for LocalizationGraph. + /// + /// + /// The glycanBox, ex. [0,0,1] means glycan0 + glycan0 + glycan1 + /// + /// The ChildBox collection, ChildBox[] public static IEnumerable BuildChildOGlycanBoxes(int maxNum, int[] glycanIds, bool targetDecoy = true) { yield return new GlycanBox(new int[0], targetDecoy); HashSet seen = new HashSet(); for (int i = 1; i <= maxNum; i++) { - foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i)) - { - List ids = new List(); - foreach (var id in idCombine) + foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i)) //get all combinations of glycans on the peptide, ex. we have three glycosite and three glycan maybe on that (A,B,C) + { //the combination of glycans on the peptide can be (A),(A+B),(A+C),(B+C),(A+B+C) totally six + List ids = new List(); + foreach (var id in idCombine) { - ids.Add(glycanIds[id]); + ids.Add(glycanIds[id]); } if (!seen.Contains(string.Join(",", ids.Select(p => p.ToString())))) @@ -104,14 +120,19 @@ public static IEnumerable BuildChildOGlycanBoxes(int maxNum, int[] gl } } + /// + /// Constructor of GlycanBox. + /// + /// The glycanBox composition, each number represent one glycan index in the database + /// public GlycanBox(int[] ids, bool targetDecoy = true):base(ids) { byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - foreach (var id in ModIds) + foreach (var id in ModIds) //ModIds is the same as ids. { - for (int i = 0; i < kind.Length; i++) + for (int i = 0; i < kind.Length; i++) { - kind[i] += GlobalOGlycans[id].Kind[i]; + kind[i] += GlobalOGlycans[id].Kind[i]; //kind is the sum of all glycan Kind in the Box. } } Kind = kind; @@ -127,18 +148,13 @@ public GlycanBox(int[] ids, bool targetDecoy = true):base(ids) Mass = (double)(Glycan.GetMass(Kind) + SugarShift[shiftInd]) / 1E5; } } - - public GlycanBox[] ChildGlycanBoxes { get; set; } - - public string GlycanIdString + + public string GlycanIdString // the composition of glycanBox. Example: [1,2,3] means glycan1 + glycan2 + glycan3 are on the peptide. { get { return string.Join(",", ModIds.Select(p => p.ToString())); } } - - public byte[] Kind{ get; private set; } - } } diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs index 27c8cd5a0..b716fb081 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs @@ -7,9 +7,17 @@ namespace EngineLayer { // in our database, the N-glycan.gdb should be correct to the new format + // the class for loading glycan database then creeat the glycan object. public static class GlycanDatabase { - //Load Glycan from the database file (located in the Glycan_Mod). Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions. + + /// + /// Load Glycan from the database file. Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions + /// + /// Database file path + /// Do we need to generate the glycanIon? + /// + /// A glycan object collection public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIons, bool IsOGlycanSearch) { bool isKind = true; @@ -18,7 +26,7 @@ public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIon while(lines.Peek() != -1) { string line = lines.ReadLine(); - if (!line.Contains("HexNAc")) //use the first line to determine the type of glycan database. + if (!line.Contains("HexNAc")) // use the first line to determine the format (kind / structure) of glycan database. { isKind = false; } @@ -32,11 +40,18 @@ public static IEnumerable LoadGlycan(string filePath, bool ToGenerateIon } else { - return LoadStructureGlycan(filePath, IsOGlycanSearch); // open the file of the structure format, example: (N(H(A))(A)) + return LoadStructureGlycan(filePath, IsOGlycanSearch); // open the file of the structure format, example: (N(H(A))(A)) } } - //Load KindGlycan. Compatible with Byonic. + + /// + /// Load composition format Glycan database, then convert to kind format followed by generating the glycan object. + /// + /// + /// + /// + /// The glycan collection public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerateIons, bool IsOGlycanSearch) { using (StreamReader lines = new StreamReader(filePath)) @@ -46,14 +61,14 @@ public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerat { string line = lines.ReadLine().Split('\t').First(); - if (!(line.Contains("HexNAc") || line.Contains("Hex"))) + if (!(line.Contains("HexNAc") || line.Contains("Hex"))) // Make sure the line is a glycan line. The line should contain HexNAc or Hex. { continue; } - var kind = String2Kind(line); // convert the database string to kind[] format (byte array). + var kind = String2Kind(line); // Convert the database string to kind[] format (byte array). - var glycan = new Glycan(kind); // use the kind[] to create a glycan object. + var glycan = new Glycan(kind); // Use the kind[] to create a glycan object. glycan.GlyId = id++; if (ToGenerateIons) { @@ -71,8 +86,11 @@ public static IEnumerable LoadKindGlycan(string filePath, bool ToGenerat } } - //Convert the string to byte array. - //Input example: HexNAc(2)Hex(5)NeuAc(1)Fuc(1), Output example: [2, 5, 0, 0, 1, 0, 0, 0, 0, 1] + /// + /// Convert the glycan string to Kind array + /// + /// ex. HexNAc(2)Hex(5)NeuAc(1)Fuc(1) + /// The glycan Kind List ex. [2, 5, 0, 0, 1, 0, 0, 0, 0, 1] public static byte[] String2Kind(string line) { byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; @@ -87,7 +105,12 @@ public static byte[] String2Kind(string line) return kind; } - //Load structured Glycan database. + /// + /// Load structured format Glycan database and generate the glycan object. + /// + /// + /// + /// The Glycan object collection public static IEnumerable LoadStructureGlycan(string filePath, bool IsOGlycan) { using (StreamReader glycans = new StreamReader(filePath)) @@ -95,7 +118,7 @@ public static IEnumerable LoadStructureGlycan(string filePath, bool IsOG int id = 1; while (glycans.Peek() != -1) { - string line = glycans.ReadLine(); + string line = glycans.ReadLine(); // Read the line from the database file. Ex. (N(H(A))(A)) yield return Glycan.Struct2Glycan(line, id++, IsOGlycan); // Directly convert the string to Glycan object. } } @@ -367,6 +390,7 @@ private static GlycanIon ExtendGlycanIon(GlycanIon glycanIon, byte hexose_count, //This function build fragments based on the general core of OGlyco fragments. //From https://github.com/mobiusklein/glycopeptidepy/structure/fragmentation_strategy/glycan.py //The fragment generation is not as good as structure based method. So it is better to use a structure based O-Glycan database. + // We don't use this function now, alternatively, we use the 'OGlycanCompositionCombinationChildIons'. public static List OGlycanCompositionFragments(byte[] kind) { List glycanIons = new List(); @@ -476,7 +500,11 @@ public static List OGlycanCompositionFragments(byte[] kind) return glycanIons; } - //The OGlycanCompositionFragments just generate some core GlycanIons. We need a combination solution. + /// + /// Generate some child ions based on the kind array. The kind array is the combination of the monosaccharides then filter by the rules. + /// + /// glycan Kind[] + /// The glycanIon collection public static List OGlycanCompositionCombinationChildIons(byte[] kind) { List glycanIons = new List(); @@ -491,7 +519,7 @@ public static List OGlycanCompositionCombinationChildIons(byte[] kind foreach (var k in _kinds) { - //Rules to build OGlycan child ions. + //Rules to build OGlycan child ions. Filter the kind array which doesn't meet the rules. //At least one HexNAc if (k[1] == 0) { @@ -518,15 +546,21 @@ public static List OGlycanCompositionCombinationChildIons(byte[] kind return glycanIons.OrderBy(p=>p.IonMass).ToList(); } - private static void _GetCombinations(byte[] kind, List _kinds, HashSet _keys) - { - if (kind.Sum(p=>p) == 0) + /// + /// Try to create all possible combinations from the glycan kind[]. And store the combination array in the _kinds list. + /// + /// ex. [2,2,0] + /// + /// + private static void _GetCombinations(byte[] kind, List _kinds, HashSet _keys) + { + if (kind.Sum(p=>p) == 0) { - return; + return; // if we don't have any monosaccharide, no need to generate the child ions. } else { - for (int i = 0; i < kind.Length; i++) + for (int i = 0; i < kind.Length; i++) //traverse the kind array { if (kind[i] >= 1) { diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs index 0373cd838..b77384e7a 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs @@ -11,7 +11,13 @@ namespace EngineLayer.GlycoSearch { public static class GlycoPeptides - { // a little bit confused part..., I guess the function will generate a list of isotopic intesitry of the oxonium ions. + { + /// + /// Generate a list of isotopic intesitry of the oxonium ions + /// + /// The MS2 Scan + /// + /// int[], The intensity list public static double[] ScanOxoniumIonFilter(Ms2ScanWithSpecificMass theScan, MassDiffAcceptor massDiffAcceptor) { double[] oxoniumIonsintensities = new double[Glycan.AllOxoniumIons.Length]; @@ -180,7 +186,7 @@ public static bool DissociationTypeContainETD(DissociationType dissociationType, return true; } - if (dissociationType == DissociationType.Custom ) + if (dissociationType == DissociationType.Custom ) //Use the fragment type to determine the dissociation type. { if (customIons.Contains(ProductType.zDot) || customIons.Contains(ProductType.c)) { @@ -192,13 +198,22 @@ public static bool DissociationTypeContainETD(DissociationType dissociationType, } //TO THINK: filter reasonable fragments here. The final solution is to change mzLib.Proteomics.PeptideWithSetModifications.Fragment + + /// + /// Get the theoretical fragments of the peptide with the glycan modification. With different dissociation type, the fragment ions are different. + /// + /// + /// + /// + /// + /// product[], Fragments list public static List OGlyGetTheoreticalFragments(DissociationType dissociationType, List customIons, PeptideWithSetModifications peptide, PeptideWithSetModifications modPeptide) { List theoreticalProducts = new List(); HashSet masses = new HashSet(); List products = new List(); - if (dissociationType == DissociationType.HCD || dissociationType == DissociationType.CID) + if (dissociationType == DissociationType.HCD || dissociationType == DissociationType.CID) { List diag = new List(); modPeptide.Fragment(dissociationType, FragmentationTerminus.Both, diag); @@ -241,7 +256,7 @@ public static List OGlyGetTheoreticalFragments(DissociationType dissoci } - foreach (var fragment in products) + foreach (var fragment in products) //this part just for the unique fragment ions. (filter the fragment with the same neturalMass) { if (!masses.Contains(fragment.NeutralMass)) { @@ -253,23 +268,31 @@ public static List OGlyGetTheoreticalFragments(DissociationType dissoci return theoreticalProducts; } + + /// + /// Generate the theroertical glycan modified peptide. With the glycanBox, modPos, and the peptide. + /// + /// + /// + /// + /// A modfiied peptide. public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(int[] theModPositions, PeptideWithSetModifications peptide, GlycanBox glycanBox) { Modification[] modifications = new Modification[glycanBox.NumberOfMods]; for (int i = 0; i < glycanBox.NumberOfMods; i++) { - modifications[i] = GlycanBox.GlobalOGlycanModifications[glycanBox.ModIds.ElementAt(i)]; + modifications[i] = GlycanBox.GlobalOGlycanModifications[glycanBox.ModIds.ElementAt(i)]; // transfer the glycanBox information to a new list. } Dictionary testMods = new Dictionary(); foreach (var mod in peptide.AllModsOneIsNterminus) { - testMods.Add(mod.Key, mod.Value); + testMods.Add(mod.Key, mod.Value); // transfer the AllMod information to a new list. } for (int i = 0; i < theModPositions.Count(); i++) { - testMods.Add(theModPositions.ElementAt(i), modifications[i]); + testMods.Add(theModPositions.ElementAt(i), modifications[i]); //combine the glycanBox information to the AllMod list } var testPeptide = new PeptideWithSetModifications(peptide.Protein, peptide.DigestionParams, peptide.OneBasedStartResidue, @@ -278,6 +301,12 @@ public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(int[] theMod return testPeptide; } + /// + /// Generate the theroertical glycan modified peptide. With the route the peptide. Because the route contains the glycanBox and modPos information. + /// + /// + /// + /// A modfiied peptide public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(Route theModPositions, PeptideWithSetModifications peptide) { Modification[] modifications = new Modification[theModPositions.Mods.Count]; @@ -303,16 +332,24 @@ public static PeptideWithSetModifications OGlyGetTheoreticalPeptide(Route theMod return testPeptide; } - //The function here is to calculate permutation localization which could be used to compare with Graph-Localization. + //Should be revised for easier understanding. + /// + /// Generate all possible glycosite for the glycan set. Supposed we will put the glycan on the glycosite in sequence. + /// + /// Ex. [3,5,2,7] + /// Ex. [2,2,3] means id2 + id2 + id3 + /// A glycosite set collection. Ex. ([2,5,7],[3,5,7]...), each one list means the glcosites for glycanBox. + /// [2,5,7] means we will put the glycan on position 2, 5, 7. + /// public static List GetPermutations(List allModPos, int[] glycanBoxId) { var length = glycanBoxId.Length; - var indexes = Enumerable.Range(0, length).ToArray(); + var indexes = Enumerable.Range(0, length).ToArray(); // just the index for the glycanBoxId to keep the order. int[] orderGlycan = new int[length]; - List permutateModPositions = new List(); + List permutateModPositions = new List(); //The list to store all possible permutation localization. - var combinations = Glycan.GetKCombs(allModPos, length); + var combinations = Glycan.GetKCombs(allModPos, length); //Get all possible combinations of the mod sites. ex. four site[1,2,3,4], length:3 -> combination [1,2,3], [1,2,4], [1,3,4], [2,3,4] foreach (var com in combinations) { @@ -332,7 +369,7 @@ public static List GetPermutations(List allModPos, int[] glycanBoxId orderGlycan[i] = glycanBoxId[indexes[i]]; } var key = string.Join(",", orderGlycan.Select(p => p.ToString())); - if (!keys.Contains(key)) + if (!keys.Contains(key)) //Remove the duplicate permutation localization. { keys.Add(key); permutateModPositions.Add(per.ToArray()); @@ -343,25 +380,32 @@ public static List GetPermutations(List allModPos, int[] glycanBoxId return permutateModPositions; } - //The purpose of the funtion is to generate hash fragment ions without generate the PeptideWithMod. keyValuePair key:GlycanBoxId, Value:mod sites + + /// + /// Generate the new fragment list, we add the glycan mass to the c ions and z ions from the peptide fragment list + /// + /// + /// + /// + /// + /// public static int[] GetFragmentHash(List products, Tuple keyValuePair, GlycanBox[] OGlycanBoxes, int FragmentBinsPerDalton) { - double[] newFragments = products.OrderBy(p=>p.ProductType).ThenBy(p=>p.FragmentNumber).Select(p => p.NeutralMass).ToArray(); + double[] newFragments = products.OrderBy(p=>p.ProductType).ThenBy(p=>p.FragmentNumber).Select(p => p.NeutralMass).ToArray(); // store the fragment mass in the order of c1, c2, c3, y1, y2, y3, z1, z2, z3 var len = products.Count / 3; if (keyValuePair.Item2!=null) { - for (int i = 0; i < keyValuePair.Item2.Length; i++) - { + for (int i = 0; i < keyValuePair.Item2.Length; i++) // we want to add the glycan mass to the c ions and z ions that contain the glycan. + { // y ions didn't change in EThcD for O-glyco, so we just need to deal with c ions and z ions. var j = keyValuePair.Item2[i]; - while (j <= len + 1) + while (j <= len + 1) // for c ions { newFragments[j - 2] += (double)GlycanBox.GlobalOGlycans[OGlycanBoxes[keyValuePair.Item1].ModIds[i]].Mass/1E5; j++; } - j = keyValuePair.Item2[i]; - while (j >= 3) + j = keyValuePair.Item2[i]; // reset the j to the position of the glycan + while (j >= 3) // for z ions { - //y ions didn't change in EThcD for O-glyco newFragments[len * 3 - j + 2] += (double)GlycanBox.GlobalOGlycans[OGlycanBoxes[keyValuePair.Item1].ModIds[i]].Mass/1E5; j--; } @@ -369,7 +413,7 @@ public static int[] GetFragmentHash(List products, Tuple ke } - int[] fragmentHash = new int[products.Count]; + int[] fragmentHash = new int[products.Count]; // store the fragment mass in the order of c1, c2, c3, y1, y2, y3, z1, z2, z3 and with the umit of FragmentBinsPerDalton for (int i = 0; i < products.Count; i++) { fragmentHash[i] = (int)Math.Round(newFragments[i] * FragmentBinsPerDalton); @@ -377,8 +421,16 @@ public static int[] GetFragmentHash(List products, Tuple ke return fragmentHash; } - //Find FragmentHash for current box at modInd. - //y-ion didn't change for O-Glycopeptide. + + /// + /// Generate the fragment list with the specific childBox located on specific modPos. At here, the ModInd is the index for modPos. Not used in the current version. + /// + /// + /// ModPos list + /// Specific ModPos, index in ModPos + /// Whole glycanBox + /// Partial glycanBox, at here is the childBox + /// public static List GetLocalFragment(List products, int[] modPoses, int modInd, ModBox OGlycanBox, ModBox localOGlycanBox) { List newFragments = new List(); @@ -433,8 +485,12 @@ public static List GetUnlocalFragment(List products, int[] modP } - //The oxoniumIonIntensities is related with Glycan.AllOxoniumIons. - //Rules are coded in the function. + /// + /// The oxoniumIonIntensities is related with Glycan.AllOxoniumIons. Filter the invalid data. + /// + /// + /// + /// True : is Oglycan and pass the filter, False: isn't Oglycan and not pass the filter public static bool OxoniumIonsAnalysis(double[] oxoniumIonsintensities, GlycanBox glycanBox) { //If a glycopeptide spectrum does not have 292.1027 or 274.0921, then remove all glycans that have sialic acids from the search. @@ -458,6 +514,7 @@ public static bool OxoniumIonsAnalysis(double[] oxoniumIonsintensities, GlycanBo //Other rules: //A spectrum needs to have 204.0867 to be considered as a glycopeptide. //Ratio of 138.055 to 144.0655 can seperate O/N glycan. + // use some other oxonium ions to determine the glycan type. return true; } diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs index 32b48dda0..31e7208df 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSearchEngine.cs @@ -15,13 +15,13 @@ namespace EngineLayer.GlycoSearch public class GlycoSearchEngine : ModernSearchEngine { public static readonly double ToleranceForMassDifferentiation = 1e-9; - private readonly int OxoniumIon204Index = 9; //Check Glycan.AllOxoniumIons - protected readonly List[] GlobalCsms; + private readonly int OxoniumIon204Index = 9; // Check Glycan.AllOxoniumIons + protected readonly List[] GlobalCsms; // Why don't we call it GlobalGsms? private GlycoSearchType GlycoSearchType; - private readonly int TopN; + private readonly int TopN; // DDA top Peak number. private readonly int _maxOGlycanNum; - private readonly bool OxoniumIonFilter; //To filt Oxonium Ion before searching a spectrum as glycopeptides. If we filter spectrum, it must contain oxonium ions such as 204 (HexNAc). + private readonly bool OxoniumIonFilter; // To filt Oxonium Ion before searching a spectrum as glycopeptides. If we filter spectrum, it must contain oxonium ions such as 204 (HexNAc). private readonly string _oglycanDatabase; private readonly string _nglycanDatabase; @@ -77,6 +77,15 @@ public GlycoSearchEngine(List[] globalCsms, Ms2ScanWithSpeci private Glycan[] NGlycans { get; } //private Glycan[] DecoyGlycans { get; } + /// + /// Run the glycoSearchEngine, the main function for the glycoSearchEngine. + /// Four steps: + /// (1) run a modern search engine to get the peptide candidates. + /// (2) match the peptide candidates with the precursor mass. + /// (3) use the mass shift to generate the route for the glycan localization. + /// (4) evaluate the highest score for the glycan localization and generate the glycoSpectralMatch. + /// + /// SearchResult protected override MetaMorpheusEngineResults RunSpecific() { double progress = 0; @@ -85,14 +94,14 @@ protected override MetaMorpheusEngineResults RunSpecific() byte byteScoreCutoff = (byte)CommonParameters.ScoreCutoff; - int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile; + int maxThreadsPerFile = CommonParameters.MaxThreadsToUsePerFile; // MaxThreads = deafult is 7. int[] threads = Enumerable.Range(0, maxThreadsPerFile).ToArray(); // We can do the parallel search on different threads Parallel.ForEach(threads, (scanIndex) => { byte[] scoringTable = new byte[PeptideIndex.Count]; List idsOfPeptidesPossiblyObserved = new List(); - byte[] secondScoringTable = new byte[PeptideIndex.Count]; + byte[] secondScoringTable = new byte[PeptideIndex.Count]; // We didn't use that right now. List childIdsOfPeptidesPossiblyObserved = new List(); List idsOfPeptidesTopN = new List(); @@ -111,7 +120,7 @@ protected override MetaMorpheusEngineResults RunSpecific() var scan = ListOfSortedMs2Scans[scanIndex]; - // get fragment bins for this scan + // get fragment bins for this scan List allBinsToSearch = GetBinsToSearch(scan, FragmentIndex, CommonParameters.DissociationType); //Limit the high bound limitation, here assume it is possible to has max 3 Da shift. This allows for correcting precursor in the future. @@ -147,23 +156,23 @@ protected override MetaMorpheusEngineResults RunSpecific() // } //} - // done with indexed scoring; refine scores and create PSMs - if (idsOfPeptidesPossiblyObserved.Any()) + // filtering the peptides candidate with the cufoff and limit the topN peptides. + if (idsOfPeptidesPossiblyObserved.Any()) { scoreAtTopN = 0; peptideCount = 0; - foreach (int id in idsOfPeptidesPossiblyObserved.OrderByDescending(p => scoringTable[p])) + foreach (int id in idsOfPeptidesPossiblyObserved.OrderByDescending(p => scoringTable[p])) //from the higest score to the lowest score { - if (scoringTable[id] < (int)byteScoreCutoff) + if (scoringTable[id] < (int)byteScoreCutoff) //if the score is lower than the cutoff, we can skip this peptide. { continue; } peptideCount++; if (peptideCount == TopN) { - scoreAtTopN = scoringTable[id]; + scoreAtTopN = scoringTable[id]; //ScoreAtTopN = The score of the last peptide in the TopN list. } - if (scoringTable[id] < scoreAtTopN) + if (scoringTable[id] < scoreAtTopN) { break; } @@ -174,7 +183,7 @@ protected override MetaMorpheusEngineResults RunSpecific() if (GlycoSearchType == GlycoSearchType.OGlycanSearch) { - gsms = FindOGlycopeptideHashLocal(scan, idsOfPeptidesTopN, scanIndex, (int)byteScoreCutoff); + gsms = FindOGlycopeptideHashLocal(scan, idsOfPeptidesTopN, scanIndex, (int)byteScoreCutoff); // Use the peptide candidate and the scan to generate the gsms. } else if(GlycoSearchType == GlycoSearchType.NGlycanSearch) { @@ -194,7 +203,7 @@ protected override MetaMorpheusEngineResults RunSpecific() if (GlobalCsms[scanIndex] == null) { - GlobalCsms[scanIndex] = new List(); + GlobalCsms[scanIndex] = new List(); //the first one finished task, create teh new gsms list. } else { @@ -214,11 +223,11 @@ protected override MetaMorpheusEngineResults RunSpecific() { oldPercentProgress = percentProgress; ReportProgress(new ProgressEventArgs(percentProgress, "Performing glyco search... " + CurrentPartition + "/" + CommonParameters.TotalPartitions, NestedIds)); - } + } //percentProgress = 100, "Performing glyco search...1/1", NestedIds = 3. } }); - return new MetaMorpheusEngineResults(this); + return new MetaMorpheusEngineResults(this); //Storage the result information into the result class. } private void Add2GlobalGsms(ref List gsms, int scanIndex) @@ -230,11 +239,11 @@ private void Add2GlobalGsms(ref List gsms, int scanIndex) foreach (var gsm in gsms.Where(p => p != null).OrderByDescending(p => p.Score).ThenBy(c => c.FullSequence)) { - if (gsmsCount <= 10) + if (gsmsCount <= 10) { - gsm.ResolveAllAmbiguities(); + gsm.ResolveAllAmbiguities(); //Try to resolve any case that have the same sequence in the PSM. - if (gsmsCount == 1) + if (gsmsCount == 1) //If the gsms number is 1, we don't need to check the score and sequence. { preScore = gsm.Score; preString = gsm.FullSequence; @@ -242,17 +251,17 @@ private void Add2GlobalGsms(ref List gsms, int scanIndex) GlobalCsms[scanIndex].Add(gsm); gsmsCount++; } - else + else { - if (gsm.Score - preScore < ToleranceForMassDifferentiation && + if (gsm.Score - preScore < ToleranceForMassDifferentiation && gsm.Score - preScore > -ToleranceForMassDifferentiation) { string currentString = gsm.FullSequence; - if (preString == currentString) + if (preString == currentString) //If peptides have the same sequence and their score is almost the same { - foreach ((int, PeptideWithSetModifications Peptide) bestMatchPeptide in gsm.BestMatchingBioPolymersWithSetMods) - { + foreach ((int, PeptideWithSetModifications Peptide) bestMatchPeptide in gsm.BestMatchingBioPolymersWithSetMods) // We should add tje new ProteinMatch to the gsm. + { // Because the indentical sequence may from the different protein. GlobalCsms[scanIndex].Last().AddProteinMatch(bestMatchPeptide, gsm.BioPolymersWithSetModsToMatchingFragments[bestMatchPeptide.Peptide]); } @@ -355,6 +364,15 @@ private GlycoSpectralMatch CreateGsm(Ms2ScanWithSpecificMass theScan, int scanIn return psmGlyco; } + /// + /// If the peptide mass is perfectly match with the precursor mass, we can directly generate the gsms for the peptide. Store the gsms into the possibleMatches. + /// + /// + /// + /// + /// The peptide candidate + /// + /// The space to store the gsms private void FindSingle(Ms2ScanWithSpecificMass theScan, int scanIndex, int scoreCutOff, PeptideWithSetModifications theScanBestPeptide, int ind, ref List possibleMatches) { List products = new List(); @@ -371,7 +389,17 @@ private void FindSingle(Ms2ScanWithSpecificMass theScan, int scanIndex, int scor } } - // + /// + /// Match the mass of the peptide candidate with the precursor mass. Try to generate the Gsms for the Scan. Gsms will be stored in the possibleMatches. + /// + /// + /// + /// + /// peptide candidate + /// + /// The precursor mass + /// + /// The space to store the gsms private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int scoreCutOff, PeptideWithSetModifications theScanBestPeptide, int ind, double possibleGlycanMassLow, double[] oxoniumIonIntensities, ref List possibleMatches) { int iDLow = GlycoPeptides.BinarySearchGetIndex(GlycanBox.OGlycanBoxes.Select(p => p.Mass).ToArray(), possibleGlycanMassLow); // try to find the index that closet match to the "possibleGlycanMassLow" within the glycanBox @@ -407,24 +435,24 @@ private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco List localizationGraphs = new List(); // if we also have ETD, then we will search the localization - while (iDLow < GlycanBox.OGlycanBoxes.Count() && (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass + GlycanBox.OGlycanBoxes[iDLow].Mass))) + while (iDLow < GlycanBox.OGlycanBoxes.Count() && (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass + GlycanBox.OGlycanBoxes[iDLow].Mass))) // verify the glycan mass is invaild (within the range and match with mass shift) { - if (OxoniumIonFilter && !GlycoPeptides.OxoniumIonsAnalysis(oxoniumIonIntensities, GlycanBox.OGlycanBoxes[iDLow])) + if (OxoniumIonFilter && !GlycoPeptides.OxoniumIonsAnalysis(oxoniumIonIntensities, GlycanBox.OGlycanBoxes[iDLow])) // if the filter is turned on, we need to check does the oxoiums make sense. { - iDLow++; + iDLow++; // if the oxonium ions don't make sense (there is no 204, or without their diagnostic ion), we can skip this glycan. continue; } - if (modPos.Length >= GlycanBox.OGlycanBoxes[iDLow].NumberOfMods) + if (modPos.Length >= GlycanBox.OGlycanBoxes[iDLow].NumberOfMods) // the glycosite number should be larger than the possible glycan number. { LocalizationGraph localizationGraph = new LocalizationGraph(modPos, GlycanBox.OGlycanBoxes[iDLow], GlycanBox.OGlycanBoxes[iDLow].ChildGlycanBoxes, iDLow); - LocalizationGraph.LocalizeOGlycan(localizationGraph, localizationScan, CommonParameters.ProductMassTolerance, products); + LocalizationGraph.LocalizeOGlycan(localizationGraph, localizationScan, CommonParameters.ProductMassTolerance, products); //create the localization graph with the glycan mass and the possible glycosite. double currentLocalizationScore = localizationGraph.TotalScore; - if (currentLocalizationScore > bestLocalizedScore) + if (currentLocalizationScore > bestLocalizedScore) //Try to find the best glycanBox with the highest score. { bestLocalizedScore = currentLocalizationScore; localizationGraphs.Clear(); - localizationGraphs.Add(localizationGraph); + localizationGraphs.Add(localizationGraph); // we only keep the best glycanBox and its localizationgraph. } else if ((is_HCD_only_data || bestLocalizedScore > 0) && (currentLocalizationScore <= bestLocalizedScore + 0.00000001 && currentLocalizationScore >= bestLocalizedScore - 0.00000001)) { @@ -438,10 +466,10 @@ private void FindOGlycan(Ms2ScanWithSpecificMass theScan, int scanIndex, int sco //In theory, the peptide_localization shouldn't be null, but it is possible that the real score is smaller than indexed score. if (localizationGraphs.Count > 0) { - var firstPath = LocalizationGraph.GetFirstPath(localizationGraphs[0].array, localizationGraphs[0].ChildModBoxes); - var localizationCandidate = LocalizationGraph.GetLocalizedPath(localizationGraphs[0], firstPath); + var firstPath = LocalizationGraph.GetFirstPath(localizationGraphs[0].array, localizationGraphs[0].ChildModBoxes); //Get the first path from the localization graph. + var localizationCandidate = LocalizationGraph.GetLocalizedPath(localizationGraphs[0], firstPath); //Get the route of the localization from the first path inforation - var psmGlyco = CreateGsm(theScan, scanIndex, ind, theScanBestPeptide, localizationCandidate, oxoniumIonIntensities, localizationGraphs); + var psmGlyco = CreateGsm(theScan, scanIndex, ind, theScanBestPeptide, localizationCandidate, oxoniumIonIntensities, localizationGraphs); //Create the glycoSpectralMatch if (psmGlyco.Score > scoreCutOff) { @@ -569,15 +597,26 @@ private List FindNGlycopeptide(Ms2ScanWithSpecificMass theSc return possibleMatches; } - // This function conduct the search and generate the glyco search match spectrum (gsms). - // The search is the modern search to check each possible peptide candidate. + + // Match the mass of the peptide candiate with the precursor mass, then try to generate the gsms object as output + /// + /// This is a general function for gsm generating. It was operated after the Modern Search. + /// Two Step: + /// (1) Match the mass of the peptide candiate with the precursor mass, then decide to go to which function to generate the gsms object. + /// (2) Catch the gsms object and store it into the possibleMatches then return. + /// + /// The MS2 Scan + /// The peptide candidate from the modern Search + /// + /// + /// The Gsms collection. private List FindOGlycopeptideHashLocal(Ms2ScanWithSpecificMass theScan, List idsOfPeptidesPossiblyObserved, int scanIndex, int scoreCutOff) { List possibleMatches = new List(); for (int ind = 0; ind < idsOfPeptidesPossiblyObserved.Count; ind++) { - var theScanBestPeptide = PeptideIndex[idsOfPeptidesPossiblyObserved[ind]]; + var theScanBestPeptide = PeptideIndex[idsOfPeptidesPossiblyObserved[ind]]; // Get the peptide from the candidate list. if (PrecusorSearchMode.Within(theScan.PrecursorMass, theScanBestPeptide.MonoisotopicMass)) // If the peptide mass is indentical to the precursor mass (or within the tolerance), we can directly search the glycopeptide. { diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs index f35e23131..c6f5ec129 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycoSpectralMatch.cs @@ -47,9 +47,15 @@ public GlycoSpectralMatch(PeptideWithSetModifications theBestPeptide, int notch, public double R138vs144 { get; set; } // The intensity ratio of this 138 and 144 could be a signature for O-glycan or N-glycan. public List> LocalizedGlycan { get; set; } // All seen glycans identified. - public LocalizationLevel LocalizationLevel { get; set; } + public LocalizationLevel LocalizationLevel { get; set; } //Motif should be writen with required form + /// + /// Try to get the ModSite in the right format. + /// + /// full peptide sequence ex. "PTLFKNVSLYK" + /// modificatino AA ex. "S","T" + /// int[], the Modpositon index list ex.[9,3] public static List GetPossibleModSites(PeptideWithSetModifications peptide, string[] motifs) { List possibleModSites = new List(); @@ -58,14 +64,14 @@ public static List GetPossibleModSites(PeptideWithSetModifications peptide, foreach (var mtf in motifs) { - if (ModificationMotif.TryGetMotif(mtf, out ModificationMotif aMotif)) + if (ModificationMotif.TryGetMotif(mtf, out ModificationMotif aMotif)) //Check if the motif is valid, and creat the motif object from the string. { - Modification modWithMotif = new Modification(_target: aMotif, _locationRestriction: "Anywhere."); + Modification modWithMotif = new Modification(_target: aMotif, _locationRestriction: "Anywhere."); modifications.Add(modWithMotif); } } - foreach (var modWithMotif in modifications) + foreach (var modWithMotif in modifications) //interate through all the modifications with motif. { for (int r = 0; r < peptide.Length; r++) { @@ -113,7 +119,11 @@ public static bool MotifExist(string baseSeq, string[] motifs) return false; } - public static string GetTabSepHeaderSingle() //Most complicate part in this class, writing function to input the outcome into the excel file + /// + /// Generate the peptide header, ex File name, Precursor m/z, Score… + /// + /// + public static string GetTabSepHeaderSingle() //Most complicate part in this class { var sb = new StringBuilder(); sb.Append("File Name" + '\t'); @@ -151,6 +161,10 @@ public static string GetTabSepHeaderSingle() //Most complicate part in this clas return sb.ToString(); } + /// + /// Generate the glyco header ex Localization Score, Yion Score… + /// + /// public static string GetTabSeperatedHeaderGlyco() { var sb = new StringBuilder(); @@ -174,6 +188,10 @@ public static string GetTabSeperatedHeaderGlyco() return sb.ToString(); } + /// + /// Put the psm data into the corresponding columns. + /// + /// public string SingleToString() { var sb = new StringBuilder(); @@ -188,7 +206,7 @@ public string SingleToString() var proteinAccessionString = Accession ?? PsmTsvWriter.Resolve(BestMatchingBioPolymersWithSetMods.Select(p => p.Peptide.Parent.Accession), FullSequence).ResolvedString; sb.Append(proteinAccessionString + "\t"); sb.Append(Organism + "\t"); - sb.Append(PsmTsvWriter.Resolve(BestMatchingBioPolymersWithSetMods.Select(b => b.Peptide.Parent.FullName), FullSequence).ResolvedString + "\t"); + sb.Append(PsmTsvWriter.Resolve(BestMatchingBioPolymersWithSetMods.Select(b => b.Peptide.Parent.FullName), FullSequence).ResolvedString + "\t"); //protein name int _FirstOneBasedStartResidueInProtein = OneBasedStartResidue.HasValue ? OneBasedStartResidue.Value : BestMatchingBioPolymersWithSetMods.First().Peptide.OneBasedStartResidue; int _FirstOneBasedEndResidueInProtein = OneBasedEndResidue.HasValue ? OneBasedEndResidue.Value : BestMatchingBioPolymersWithSetMods.First().Peptide.OneBasedEndResidue; ; @@ -257,7 +275,10 @@ public string SingleToString() return sb.ToString(); } - //This should be appended to SingleToString + /// + /// Put the glycan data into the corresponding columns. + /// + /// public string GlycoToString() { var sb = new StringBuilder(); @@ -291,11 +312,11 @@ public string GlycoToString() for (int i = 0; i < glycanBox.NumberOfMods; i++) { glycans[i] = GlycanBox.GlobalOGlycans[glycanBox.ModIds[i]]; - } + } //Convert the glycanBox index into the real glycan object. ex. [H1N1, H2N2A1, H2N2A1F1] if (glycans.First().Struc != null) { - sb.Append(string.Join(",", glycans.Select(p => p.Struc.ToString()).ToArray())); + sb.Append(string.Join(",", glycans.Select(p => p.Struc.ToString()).ToArray())); //ex. (N(H)),(N(H(A))(N(H))),(N(H)(N(H(A))(F)) } sb.Append("\t"); @@ -357,39 +378,45 @@ public static Dictionary MatchedIonDataDictionary(List ; Input: List - // example: {18, 1, Ture}, means the 18th glycan is localized on the 1st position of the peptide. + + /// + /// Two function included: + /// (1) Analysis all pair, and evaluate any site is occured in all cases, if yes set a true on that. If not, set a false. + /// (2) Classify the localization level base on the localization. + /// + /// all case of the pair + /// level 1 to level 3 + /// A tuple, represent the pair and its confidience ex. [3,5,ture] means glycan 5 located on glycosite 3, and very confidience public static List> GetLocalizedGlycan(List OGlycanBoxLocalization, out LocalizationLevel localizationLevel) { List> localizedGlycan = new List>(); - //Dictionary: modsite-id, count - Dictionary seenModSite = new Dictionary(); + Dictionary ModSiteSeenCount = new Dictionary(); // all possible glycan-sites pair, Dictionary: site-glycan pair, count - foreach (var ogl in OGlycanBoxLocalization) + foreach (var ogl in OGlycanBoxLocalization) // ogl means one case, there are three glycan located on the same peptide: (5,1,False),(9,8,Flase),(10,9,Ture) { - foreach (var og in ogl.Mods) + foreach (var og in ogl.Mods) // og means one glycan locaization, like (5,1,False) -> glycan 1 attached on postion5. { - var k = og.Item1.ToString() + "-" + og.Item2.ToString(); - if (seenModSite.ContainsKey(k)) + var k = og.Item1.ToString() + "-" + og.Item2.ToString(); // k = 5-1(glycosite-glycan) means the glycan-site pair + if (ModSiteSeenCount.ContainsKey(k)) // accout the number of the same glycan-site pair { - seenModSite[k] += 1; + ModSiteSeenCount[k] += 1; // this pair cpunt +1 } else { - seenModSite.Add(k, 1); + ModSiteSeenCount.Add(k, 1); // If the pair is first time to seen, add it to the dictionary. } } } localizationLevel = LocalizationLevel.Level3; - if (OGlycanBoxLocalization.Count == 1) + if (OGlycanBoxLocalization.Count == 1) // we just have one situation(route), no other possibility { localizationLevel = LocalizationLevel.Level1; } else if (OGlycanBoxLocalization.Count > 1) { - if (seenModSite.Values.Where(p => p == OGlycanBoxLocalization.Count).Count() > 0) + if (ModSiteSeenCount.Values.Where(p => p == OGlycanBoxLocalization.Count).Count() > 0) //If anyone of the glycan-site pair is localized in all the cases, then the localization level is 2. { localizationLevel = LocalizationLevel.Level2; } @@ -399,9 +426,9 @@ public static List> GetLocalizedGlycan(List OGlycan } } - foreach (var seenMod in seenModSite) + foreach (var seenMod in ModSiteSeenCount) { - if (seenMod.Value == OGlycanBoxLocalization.Count) + if (seenMod.Value == OGlycanBoxLocalization.Count) // Try to fine the glycan-site pair that always localized in all the cases. { localizedGlycan.Add(new Tuple(int.Parse(seenMod.Key.Split('-')[0]), int.Parse(seenMod.Key.Split('-')[1]), true)); } @@ -414,6 +441,11 @@ public static List> GetLocalizedGlycan(List OGlycan return localizedGlycan; } + /// + /// convert the Route information into the string format. + /// + /// Route collection ex. [(9,4),(8,4),(7,4)...], ModBoxId = 7 + /// string {@7[8-4]}{@7[7-4]}{@7[6-4]} means three case, glycan 4 located on glycosite 6, glycan 4 located on glycosite 7, glycan 4 located on glycosite 8 public static string AllLocalizationInfo(List OGlycanBoxLocalization) { string local = ""; @@ -434,7 +466,7 @@ public static string AllLocalizationInfo(List OGlycanBoxLocalization) { var ogl = OGlycanBoxLocalization[i]; local += "{@" + ogl.ModBoxId.ToString() + "["; - var g = string.Join(",", ogl.Mods.Select(p => (p.Item1 - 1).ToString() + "-" + p.Item2.ToString())); + var g = string.Join(",", ogl.Mods.Select(p => (p.Item1 - 1).ToString() + "-" + p.Item2.ToString())); //why we have to -1 here? local += g + "]}"; i++; } @@ -447,7 +479,15 @@ public static string AllLocalizationInfo(List OGlycanBoxLocalization) return local; } - //Correct Localization Level based on site specific probability. If LocalizationLevel = 1, and there are site probability lower than 0.75, Correct the level to 1b. + /// + /// Just for the case at Level1 and Level1b. + /// + /// + /// + /// + /// + /// + /// level 1 or level 1b public static LocalizationLevel CorrectLocalizationLevel(Dictionary>> siteSpeciLocalProb, LocalizationGraph localizationGraph, Route route, List> localizedGlycan, LocalizationLevel localizationLevel) { if (siteSpeciLocalProb == null || localizationLevel!=LocalizationLevel.Level1) @@ -469,7 +509,7 @@ public static LocalizationLevel CorrectLocalizationLevel(Dictionary>> siteSpeciLocalProb, List> localizedGlycan, int? OneBasedStartResidueInProtein, ref string local, ref string local_protein) + /// + /// Output the special localization information. String store in Local_peptide and Local_protein. ex. [9,H2N2A1F1,0.589] means glycan H2N2A1F1 located on glycosite 9 with 0.589 probability. + /// + /// site : (glycan, probility)[] ex. site2 : [(glycan1, 5%), (glycan2, 5%), (glycan3, 90%)] + /// [(6,4,false),(7,4,false),(7,2,true)], glycosite,glycan,confidience respectively + /// + /// + /// + public static void LocalizedSiteSpeciLocalInfo(Dictionary>> siteSpeciLocalProb, List> localizedGlycan, int? OneBasedStartResidueInProtein, ref string local_peptide, ref string local_protein) { if (siteSpeciLocalProb == null) { return; } - foreach (var loc in localizedGlycan.Where(p => p.Item3)) + foreach (var glycositePair in localizedGlycan.Where(p => p.Item3)) // get the most confidient glycosite-glycan pair, loc is a pair of glycosite and glycan. Item 1 is glycosite, Item 2 is glycanId. { - var x = siteSpeciLocalProb[loc.Item1].Where(p => p.Item1 == loc.Item2).First().Item2; - var peptide_site = loc.Item1 - 1; - local += "[" + peptide_site + "," + GlycanBox.GlobalOGlycans[loc.Item2].Composition + "," + x.ToString("0.000") + "]"; + var site_glycanProb = siteSpeciLocalProb[glycositePair.Item1].Where(p => p.Item1 == glycositePair.Item2).First().Item2; // get the probability of the specfic glycan on the specific site. + var peptide_site = glycositePair.Item1 - 1; + local_peptide += "[" + peptide_site + "," + GlycanBox.GlobalOGlycans[glycositePair.Item2].Composition + "," + site_glycanProb.ToString("0.000") + "]"; - var protein_site = OneBasedStartResidueInProtein.HasValue ? OneBasedStartResidueInProtein.Value + loc.Item1 - 2 : -1; - local_protein += "[" + protein_site + "," + GlycanBox.GlobalOGlycans[loc.Item2].Composition + "," + x.ToString("0.000") + "]"; + var protein_site = OneBasedStartResidueInProtein.HasValue ? OneBasedStartResidueInProtein.Value + glycositePair.Item1 - 2 : -1; + local_protein += "[" + protein_site + "," + GlycanBox.GlobalOGlycans[glycositePair.Item2].Composition + "," + site_glycanProb.ToString("0.000") + "]"; } } + + /// + /// Generate the site specific localization information. + /// + /// + /// Site specific localization information. ex. {1[1,0.2][2,0.8]} means glycan 1 and 2 are located on glycosite 1 and 2 with 20% and 80% probability. public static string SiteSpeciLocalInfo(Dictionary>> siteSpeciLocalProb) { string local = ""; diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs b/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs index 3d56c5cd0..d68eeafd2 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/LocalizationGraph.cs @@ -17,8 +17,8 @@ public class LocalizationGraph public ModBox ModBox { get; } public ModBox[] ChildModBoxes { get; set; } - public double NoLocalCost{get; set;} //Note that we have node for each glycosite, the matched ions before the first node and after the last node is scored here. - public double TotalScore { get; set; } //Total score is the score of matched ions that are used for localization. For O-glycan, it is the score of all matched c/zDot ions. + public double NoLocalCost{get; set;} // Note that we have node for each glycosite, the matched ions before the first node and after the last node is scored here. + public double TotalScore { get; set; } // Total score is the score of matched ions that are used for localization. For O-glycan, it is the score of all matched c/zDot ions. public LocalizationGraph(int[] modPos, ModBox modBox, ModBox[] childModBoxes, int id) { @@ -36,7 +36,13 @@ public LocalizationGraph(int[] modPos, ModBox modBox, ModBox[] childModBoxes, in } //The modification problem is turned into a Directed Acyclic Graph. The Graph was build with matrix, and dynamic programming is used. - //The function goes through the AdjNode[][] array from left to right, assign weight to each AdjNode, keep track of the heaviest previous AdjNode. + /// + /// The function goes through the AdjNode[][] array from left to right, assign weight to each AdjNode, keep track of the heaviest previous AdjNode. + /// + /// The space to store the data + /// The MS2 scan + /// + /// public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanWithSpecificMass theScan, Tolerance productTolerance, List products) { var boxSatisfyBox = BoxSatisfyBox(localizationGraph.ChildModBoxes); @@ -44,17 +50,17 @@ public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanW for (int i = 0; i < localizationGraph.ModPos.Length; i++) { //maxLength: the most mods we can have up to current mod pos; minlengtt: the least mods we can have up to current mod pos. - int maxLength = i + 1; - int minlength = localizationGraph.ModBox.ModIds.Length - (localizationGraph.ModPos.Length - 1 - i); - + int maxLength = i + 1; //For the first node, the maxlength is 1. Means we max have one glycan in this positioin. + int minlength = localizationGraph.ModBox.ModIds.Length - (localizationGraph.ModPos.Length - 1 - i); //In order to get min number, the min = number of glycan in the box - number of node from the last. + // Total 3 glycan in the box, end position is 7, then for position 5, the min = 3 - (7-5) = 1. for (int j = 0; j < localizationGraph.ChildModBoxes.Length; j++) { if (localizationGraph.ChildModBoxes[j].NumberOfMods <= maxLength && localizationGraph.ChildModBoxes[j].NumberOfMods >= minlength) { - AdjNode adjNode = new AdjNode(i, j, localizationGraph.ModPos[i], localizationGraph.ChildModBoxes[j]); + AdjNode adjNode = new AdjNode(i, j, localizationGraph.ModPos[i], localizationGraph.ChildModBoxes[j]); //chekc the num of glycan in this node is make sense. double cost = 0; - if (i != localizationGraph.ModPos.Length - 1) + if (i != localizationGraph.ModPos.Length - 1) // check the node is not the last one. { var fragments = GlycoPeptides.GetLocalFragment(products, localizationGraph.ModPos, i, localizationGraph.ModBox, localizationGraph.ChildModBoxes[j]); cost = CalculateCost(theScan, productTolerance, fragments); @@ -77,7 +83,7 @@ public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanW { adjNode.AllSources.Add(prej); - var tempCost = cost + localizationGraph.array[i - 1][prej].maxCost; + var tempCost = cost + localizationGraph.array[i - 1][prej].maxCost; //Try to get the max cost from previous AdjNode. if (tempCost > maxCost) { adjNode.CummulativeSources.Clear(); @@ -110,7 +116,13 @@ public static void LocalizeOGlycan(LocalizationGraph localizationGraph, Ms2ScanW localizationGraph.TotalScore = localizationGraph.array[localizationGraph.ModPos.Length - 1][localizationGraph.ChildModBoxes.Length - 1].maxCost + noLocalScore; } - //Based on our implementation of Graph localization. We need to calculate cost between two nearby nodes (glycosites) + /// + /// Calculate the cost/Score of the Scan. + /// + /// + /// + /// + /// The Score public static double CalculateCost(Ms2ScanWithSpecificMass theScan, Tolerance productTolerance, List fragments) { double score = 0; @@ -128,7 +140,12 @@ public static double CalculateCost(Ms2ScanWithSpecificMass theScan, Tolerance pr return score; } - //Check if array1 contains array2 with repeats numbers. + /// + /// Check does the node1 contain everything in another node2? + /// + /// + /// + /// Ture, False private static bool TryGetLeft(int[] array1, int[] array2) { //Get compliment box @@ -148,9 +165,12 @@ private static bool TryGetLeft(int[] array1, int[] array2) return true; } - //The Directed Acyclic Graph is build from left to right. In the process, we need to know which node can linked to nodes from its left. - //Since node contains Childbox. We name this function as BoxSatisfyBox. - //The function defines how a childBox could be linked from all childBoxes. + + /// + /// Build a chart for the node connection rule. Used the chart to check if the next node could be linked to the previous node. + /// + /// + /// Chart (one column is previous, one column is current, the value is boolean) public static Dictionary BoxSatisfyBox(ModBox[] childBoxes) { Dictionary boxIdBoxes = new Dictionary(); @@ -160,7 +180,7 @@ public static Dictionary BoxSatisfyBox(ModBox[] childBoxes) for (int j = 0; j <= i; j++) { if (childBoxes[i].NumberOfMods <= childBoxes[j].NumberOfMods + 1 && (childBoxes[j].NumberOfMods ==0 || TryGetLeft(childBoxes[i].ModIds, childBoxes[j].ModIds))) - { + { //Check the next node could be the same or one more mod than the previous node. Besdies, the next node should contain all mods that the previous node has. idBoxes[j] = true; } } @@ -170,8 +190,13 @@ public static Dictionary BoxSatisfyBox(ModBox[] childBoxes) return boxIdBoxes; } - //Get all path with hightest score of Directed Acyclic Graph by recursion. - //Start from the last AdjNode[row-1 ][col-1], go back to it Sources, which contains the previous AdjNode with the highest cost. + + /// + /// Try to ll the highest score path in the graph. Start from the last AdjNode[row-1 ][col-1], go back to it Sources, which contains the previous AdjNode with the highest cost. + /// + /// + /// + /// The path (one or more) with the higgest Score public static List GetAllHighestScorePaths(AdjNode[][] array, ModBox[] boxes) { List allPaths = new List(); @@ -207,7 +232,12 @@ private static void GetAllHighestScorePathHelper(List allPaths, AdjNode[] } } - //Get one path of Directed Acyclic Graph by recursion. + /// + /// Get The toppest position path of in the localGraph by recursion Method. + /// + /// + /// + /// public static int[] GetFirstPath(AdjNode[][] array, ModBox[] boxes) { @@ -216,7 +246,7 @@ public static int[] GetFirstPath(AdjNode[][] array, ModBox[] boxes) int[] temp = new int[xlength]; - temp[xlength - 1] = ylength - 1; + temp[xlength - 1] = ylength - 1; // That is the last node in the graph, position is last one, and the childBpx is also the last one means the whole glycan. FirstPathHelper(array, xlength - 1, ylength - 1, temp); @@ -225,26 +255,29 @@ public static int[] GetFirstPath(AdjNode[][] array, ModBox[] boxes) private static void FirstPathHelper(AdjNode[][] array, int xind, int yind, int[] temp) { - if (xind == 0) + if (xind == 0) //xind = 0 means, there is just one glycosite. So the node must be the last one in the childBox = whole glycan. { - return; + return; // temp[0] = last one in the childBox = length-1. } - var pre = array[xind][yind].CummulativeSources.First(); + var pre = array[xind][yind].CummulativeSources.First(); // The first one in the CummulativeSources is the toppest previous node. xind--; yind = pre; temp[xind] = yind; FirstPathHelper(array, xind, yind, temp); } - //The original path we get is just an array of AdjNode positions. For example, path = [1, 1, 2, 2] means the best nodes are at array[0][1], array[1][1], array[2][2], array[3][2] - //This function here is to transfer the path into localized Route. Route contains each glycosite with glycanId. - //Basicly, any change from left to right of the path indicates a modification. For example, the path = [1, 1, 2, 2] which means there is a modification at ModPos[0] and ModPos[2] + /// + /// Convert the path inforation into Route object. + /// + /// + /// ex.[1,1,2,2,5] means the node in the localGraph, first node is ModBox1...last Node is modBox5 + /// Route object, present in glycosite-glycan pait format public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[] path) { Route route = new Route(); - if (path.Length == 1) + if (path.Length == 1) //If there is only one number in the path, we will assined "the first glycan in the childBox" to the glycosite. { bool onlyOneLocalized = false; if (localizationGraph.TotalScore > 0) @@ -255,7 +288,8 @@ public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[] return route; } - //Add first mod. If the childBoxes[path[0]].ModIds.Count == 0, means this is an empty childBox. + //Add first mod in the first glycosite. + //If the childBoxes[path[0]].ModIds.Count == 0, means this is an empty childBox. //Otherwise childBoxes[path[0]].ModIds.Count == 1 and childBoxes[path[0]].ModIds only contains one ModId. if (localizationGraph.ChildModBoxes[path[0]].ModIds.Count() != 0) { @@ -264,7 +298,8 @@ public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[] for (int i = 1; i < path.Length; i++) { - //If there is a change of the path, get the difference between the two Adjnodes of the array. + // If there is a change of the path, get the difference between the two Adjnodes of the array. + // If the node is the same childBox as the previous node. That means there is no modification at this glycosite. We can move on to the next glycosite. if (path[i] != path[i - 1]) { var left = GetLeft(localizationGraph.array[i][path[i]].ModBox.ModIds, localizationGraph.array[i - 1][path[i - 1]].ModBox.ModIds).First(); @@ -277,7 +312,13 @@ public static Route GetLocalizedPath(LocalizationGraph localizationGraph, int[] return route; } - //Get the difference between array 1 and array 2 with repeat numbers. + + /// + /// Get the difference in glycan between two node. + /// + /// The composition in this node. Ex. (0,0,1,2) means the cumulative glycoBox is composed of glycan0 + glycan0 + glycan 1 + glycan 2 + /// + /// The difference of the glycan composition between the two node. public static int[] GetLeft(int[] array1, int[] array2) { //Get compliment box @@ -340,13 +381,19 @@ private static void PathHelper_CalP(List allPaths, LocalizationGraph loca } //Dictionary>> is > + /// + /// Generate the localization probability chart for each glycosite. + /// + /// + /// + /// A dictionary represent the chart for glycosite Probility. Ex. key = 2 (ModPos), [(0,0.1),(1,0.3),(2,0.6)] means glycan 0 is 10 %, glycan 1 is 30%, glycan 2 is 60% public static Dictionary>> CalSiteSpecificLocalizationProbability(List routes, int[] modPos) { Dictionary>> probabilityMatrix = new Dictionary>>(); Tuple[][] matrix = new Tuple[modPos.Length][]; - for (int i = 0; i < modPos.Length; i++) + for (int i = 0; i < modPos.Length; i++) // There are all localization set in the route, we just try to sort the certain glycosite-glycan pairs into the corresponding glycosite. { matrix[i] = new Tuple[routes.Count]; for (int j = 0; j < routes.Count; j++) diff --git a/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs b/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs index 137bc00fb..070bb129d 100644 --- a/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs +++ b/MetaMorpheus/EngineLayer/ModernSearch/ModernSearchEngine.cs @@ -400,7 +400,7 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch, double highestMassPeptideToLookFor, List peptideIndex, MassDiffAcceptor massDiffAcceptor, double maxMassThatFragmentIonScoreIsDoubled, DissociationType dissociationType) { // get all theoretical fragments this experimental fragment could be - for (int i = 0; i < binsToSearch.Count; i++) + for (int i = 0; i < binsToSearch.Count; i++) //binsToSearch is the list of fragment in Spectra { List peptideIdsInThisBin = FragmentIndex[binsToSearch[i]]; @@ -410,11 +410,11 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch, // get index for highest mass allowed int highestPeptideMassIndex = peptideIdsInThisBin.Count - 1; - if (!Double.IsInfinity(highestMassPeptideToLookFor)) + if (!Double.IsInfinity(highestMassPeptideToLookFor)) //check if the highest mass is infinity { - highestPeptideMassIndex = BinarySearchBinForPrecursorIndex(peptideIdsInThisBin, highestMassPeptideToLookFor, peptideIndex); + highestPeptideMassIndex = BinarySearchBinForPrecursorIndex(peptideIdsInThisBin, highestMassPeptideToLookFor, peptideIndex); //get index for maximum monoisotopic allowed - for (int j = highestPeptideMassIndex; j < peptideIdsInThisBin.Count; j++) + for (int j = highestPeptideMassIndex; j < peptideIdsInThisBin.Count; j++) //find the highest peptide mass index { int nextId = peptideIdsInThisBin[j]; var nextPep = peptideIndex[nextId]; @@ -432,7 +432,7 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch, if (dissociationType == DissociationType.LowCID) { // add intensity for each peptide candidate in the scoring table up to the maximum allowed precursor mass - for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++) + for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++) { int id = peptideIdsInThisBin[j]; @@ -447,14 +447,14 @@ protected void IndexedScoring(List[] FragmentIndex, List binsToSearch, } } else - { - // add +1 score for each peptide candidate in the scoring table up to the maximum allowed precursor mass - for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++) + { + // account the peptide index shown in the bin + for (int j = lowestPeptideMassIndex; j <= highestPeptideMassIndex; j++) // iterate through the peptide index in the bin { int id = peptideIdsInThisBin[j]; scoringTable[id]++; - // add possible search results to the hashset of id's + // if the score of the peptide >3 (counts > 3 times), and the mass difference is accepted, add the peptide to the list of peptides possibly observed if (scoringTable[id] == byteScoreCutoff && massDiffAcceptor.Accepts(scanPrecursorMass, peptideIndex[id].MonoisotopicMass) >= 0) { idsOfPeptidesPossiblyObserved.Add(id); diff --git a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs index fb7e83f6a..bc00fe9f8 100644 --- a/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/GlycoSearchTask/PostGlycoSearchAnalysisTask.cs @@ -31,6 +31,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List dbFilenameList, List currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList, List allPsms, CommonParameters commonParameters, GlycoSearchParameters glycoSearchParameters, List proteinList, List variableModifications, List fixedModifications, List localizeableModificationTypes, MyTaskResults MyTaskResults) { + List proteinGroups = null; + if (!Parameters.GlycoSearchParameters.WriteDecoys) { allPsms.RemoveAll(b => b.IsDecoy); @@ -44,7 +46,7 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li //This is all psms for all files including glyco- and non-glyco psms. SingleFDRAnalysis(allPSMs, commonParameters, new List { taskId }); - List filteredGsms = allPSMs.Where(p => p.FdrInfo.QValue <= 0.01).ToList(); + List filteredPsms = allPSMs.Where(p => p.FdrInfo.QValue <= 0.01).ToList(); //write individual file results if (Parameters.GlycoSearchParameters.WriteIndividualFiles) @@ -55,28 +57,30 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li Directory.CreateDirectory(individualFileResults); } - foreach (var fileSpecificGSMs in filteredGsms.GroupBy(p => p.FullFilePath)) + + + foreach (var fileSpecificPSMs in filteredPsms.GroupBy(p => p.FullFilePath)) //group by file path, and the path will be the key for the dictionary { - string individualFileFolder = Path.GetFileNameWithoutExtension(fileSpecificGSMs.Key); + string individualFileFolder = Path.GetFileNameWithoutExtension(fileSpecificPSMs.Key); //folder name. string individualFileFolderPath = Path.Combine(individualFileResults, individualFileFolder); if (!Directory.Exists(individualFileFolderPath)) { Directory.CreateDirectory(individualFileFolderPath); } - var fsgList = fileSpecificGSMs.ToList(); + var fspList = fileSpecificPSMs.ToList(); if (Parameters.GlycoSearchParameters.DoParsimony) { - GlycoProteinAnalysis(fsgList, individualFileFolderPath, individualFileFolder); + GlycoProteinAnalysis(fspList, individualFileFolderPath, individualFileFolder); //Creat the proteinGroups file } - foreach (GlycoSpectralMatch gsm in fsgList) //maybe this needs to be the filterd list??? + foreach (GlycoSpectralMatch gsm in fspList) //maybe this needs to be the filterd list??? { gsm.ResolveAllAmbiguities(); } var individualFilePsmsPath = Path.Combine(individualFileFolderPath, individualFileFolder + "_AllPSMs.psmtsv"); - WriteGlycoFile.WritePsmGlycoToTsv(fsgList, individualFilePsmsPath, false);//this is everything, glyco and non-glyco + WriteGlycoFile.WritePsmGlycoToTsv(fspList, individualFilePsmsPath, false);//this is everything, glyco and non-glyco //the individual file AllPSMs was just written. The next method writes only those PSMs that have a glyco mod - DivideGlycoPsmsIntoGroupsWriteToTsv(glycoSearchParameters.GlycoSearchType, fsgList, commonParameters, taskId, individualFileFolderPath, individualFileFolder); + DivideGlycoPsmsIntoGroupsWriteToTsv(glycoSearchParameters.GlycoSearchType, fspList, commonParameters, taskId, individualFileFolderPath, individualFileFolder); } } @@ -84,51 +88,44 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li switch (glycoSearchParameters.GlycoSearchType) { case GlycoSearchType.OGlycanSearch: - var allPsmsOgly = filteredGsms.Where(p => p.Routes != null).ToList(); - if (allPsmsOgly.Any()) + var OglyInAllPsms = filteredPsms.Where(p => p.Routes != null).ToList(); //Try to filter out the non-glyco psms + if (OglyInAllPsms.Any()) // Is there any gsms in the allPsms? { - SingleFDRAnalysis(allPsmsOgly, commonParameters, new List { taskId }); + SingleFDRAnalysis(OglyInAllPsms, commonParameters, new List { taskId }); var writtenFileOGlyco = Path.Combine(OutputFolder + "\\oglyco" + ".psmtsv"); - var ProteinLevelLocalization = GlycoProteinParsimony.ProteinLevelGlycoParsimony(allPsmsOgly.Where(p => p.Accession != null && p.OneBasedStartResidue.HasValue).ToList()); - var seen_oglyco_localization_file = Path.Combine(OutputFolder + "\\seen_oglyco_localization" + ".tsv"); + var ProteinLevelLocalization = GlycoProteinParsimony.ProteinLevelGlycoParsimony(OglyInAllPsms.Where(p => p.Accession != null && p.OneBasedStartResidue.HasValue).ToList()); + var seen_oglyco_localization_file = Path.Combine(OutputFolder + "\\seen_oglyco_localization" + ".tsv"); //generate the localization file WriteGlycoFile.WriteSeenProteinGlycoLocalization(ProteinLevelLocalization, seen_oglyco_localization_file); var protein_oglyco_localization_file = Path.Combine(OutputFolder + "\\protein_oglyco_localization" + ".tsv"); WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_oglyco_localization_file); // Writing the oglyco results to a file and summary text - WriteGlycoFile.WritePsmGlycoToTsv(allPsmsOgly, writtenFileOGlyco, true); //we write this last so localization can be attempted - MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + allPsmsOgly. - Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant)); - MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + allPsmsOgly - .Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); + WriteGlycoFile.WritePsmGlycoToTsv(OglyInAllPsms, writtenFileOGlyco, true); //we write this last so localization can be attempted + } break; case GlycoSearchType.NGlycanSearch: - var allPsmsNgly = filteredGsms.Where(p => p.GlycanScore > 0 && p.Routes == null).ToList(); - if (allPsmsNgly.Any()) + var NglyInAllPsms = filteredPsms.Where(p => p.GlycanScore > 0 && p.Routes == null).ToList(); + if (NglyInAllPsms.Any()) { - SingleFDRAnalysis(allPsmsNgly, commonParameters, new List { taskId }); + SingleFDRAnalysis(NglyInAllPsms, commonParameters, new List { taskId }); var writtenFileNGlyco = Path.Combine(OutputFolder + "\\nglyco" + ".psmtsv"); - var ProteinLevelLocalization = GlycoProteinParsimony.ProteinLevelGlycoParsimony(allPsmsNgly.Where(p => p.Accession != null && p.OneBasedStartResidue.HasValue).ToList()); + var ProteinLevelLocalization = GlycoProteinParsimony.ProteinLevelGlycoParsimony(NglyInAllPsms.Where(p => p.Accession != null && p.OneBasedStartResidue.HasValue).ToList()); var seen_nglyco_localization_file = Path.Combine(OutputFolder + "\\seen_nglyco_localization" + ".tsv"); WriteGlycoFile.WriteSeenProteinGlycoLocalization(ProteinLevelLocalization, seen_nglyco_localization_file); var protein_nglyco_localization_file = Path.Combine(OutputFolder + "\\protein_nglyco_localization" + ".tsv"); WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_nglyco_localization_file); - WriteGlycoFile.WritePsmGlycoToTsv(allPsmsNgly, writtenFileNGlyco, true); //we write this last so localization can be attempted - MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + allPsmsNgly. //we write the search summary into the Allresult file - Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsContaminant && !p.IsDecoy)); - MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + allPsmsNgly - .Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); + WriteGlycoFile.WritePsmGlycoToTsv(NglyInAllPsms, writtenFileNGlyco, true); //we write this last so localization can be attempted } break; case GlycoSearchType.N_O_GlycanSearch: default: - var allPsmsgly = filteredGsms.Where(p => p.GlycanScore > 0).ToList(); + var allPsmsgly = filteredPsms.Where(p => p.GlycanScore > 0).ToList(); if (allPsmsgly.Any()) { SingleFDRAnalysis(allPsmsgly, commonParameters, new List { taskId }); @@ -142,36 +139,32 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li var protein_no_glyco_localization_file = Path.Combine(OutputFolder + "\\protein_no_glyco_localization" + ".tsv"); WriteGlycoFile.WriteProteinGlycoLocalization(ProteinLevelLocalization, protein_no_glyco_localization_file); WriteGlycoFile.WritePsmGlycoToTsv(allPsmsgly, writtenFileNOGlyco, true); //we write this last so localization can be attempted - MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + allPsmsgly. - Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant)); - MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + allPsmsgly - .Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1)); + } break; } if (glycoSearchParameters.DoParsimony) { - GlycoProteinAnalysis(filteredGsms, OutputFolder, null, MyTaskResults);//Do the whole group last so inference is done on the whole group + GlycoProteinAnalysis(filteredPsms, OutputFolder, null, MyTaskResults);//Do the whole group last so inference is done on the whole group } else { - GlycoAccessionAnalysis(filteredGsms, OutputFolder);//Do the whole group last so inference is done on the whole group + GlycoAccessionAnalysis(filteredPsms, OutputFolder);//Do the whole group last so inference is done on the whole group } QuantificationAnalysis(); WriteQuantificationResults(); var writtenFileSingle = Path.Combine(OutputFolder, "AllPSMs.psmtsv"); - WriteGlycoFile.WritePsmGlycoToTsv(filteredGsms, writtenFileSingle, true); - MyTaskResults.AddTaskSummaryText("All target PSMs within 1% FDR: " + filteredGsms. - Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant)); + WriteGlycoFile.WritePsmGlycoToTsv(filteredPsms, writtenFileSingle, true); + if (Parameters.GlycoSearchParameters.WriteSpectrumLibrary) { List spectrumLibrary = new List(); - foreach (var gsm in filteredGsms) + foreach (var gsm in filteredPsms) { spectrumLibrary.Add(new LibrarySpectrum(gsm.FullSequence, gsm.ScanPrecursorMonoisotopicPeakMz, gsm.ScanPrecursorCharge, gsm.MatchedFragmentIons,gsm.ScanRetentionTime,gsm.IsDecoy)); } @@ -179,10 +172,52 @@ public MyTaskResults Run(string OutputFolder, List dbFilenameList, Li } FinishedWritingFile(writtenFileSingle, new List { taskId }); + + WriteSummary(filteredPsms, glycoSearchParameters, MyTaskResults); return MyTaskResults; } + /// + /// Wirte the summary of the glyco search results to the results txt file + /// + /// + /// + /// + /// + private void WriteSummary(List targetPsms, GlycoSearchParameters glycoSearchParameters, MyTaskResults MyTaskResults) + { + var gsms = targetPsms.Where(p => p.Routes != null).ToList(); + var Level1gsms = gsms.Where(p => p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1).ToList(); + MyTaskResults.AddTaskSummaryText("All target PSMs within 1% FDR: " + (targetPsms?. + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + MyTaskResults.AddTaskSummaryText("All target protein groups within 1% FDR: " + (ProteinGroups?. + Count(p => p.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + + switch (glycoSearchParameters.GlycoSearchType) + { + case GlycoSearchType.OGlycanSearch: + MyTaskResults.AddTaskSummaryText("All target O-Glyco PSMs within 1% FDR: " + (gsms?. + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + MyTaskResults.AddTaskSummaryText("All target Level 1 O-Glyco PSMs within 1% FDR: " + (Level1gsms + ?.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1) ?? 0)); + break; + case GlycoSearchType.NGlycanSearch: + MyTaskResults.AddTaskSummaryText("All target N-Glyco PSMs within 1% FDR: " + (gsms?. + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + MyTaskResults.AddTaskSummaryText("All target Level 1 N-Glyco PSMs within 1% FDR: " + (Level1gsms + ?.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1) ?? 0)); + break; + case GlycoSearchType.N_O_GlycanSearch: + MyTaskResults.AddTaskSummaryText("All target Glyco PSMs within 1% FDR: " + (gsms?. + Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant) ?? 0)); + MyTaskResults.AddTaskSummaryText("All target Level 1 Glyco PSMs within 1% FDR: " + (Level1gsms + ?.Count(p => p.FdrInfo.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant && p.LocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1) ?? 0)); + break; + } + + } + private void DivideGlycoPsmsIntoGroupsWriteToTsv(GlycoSearchType glycoSearchType, List gsms, CommonParameters commonParameters, string taskId, string individualFileFolderPath, string individualFileFolder) @@ -190,8 +225,8 @@ private void DivideGlycoPsmsIntoGroupsWriteToTsv(GlycoSearchType glycoSearchType switch (glycoSearchType) { case GlycoSearchType.OGlycanSearch: - var allPsmsOgly = gsms.Where(p => p.Routes != null).ToList(); - if (allPsmsOgly.Any()) + var allPsmsOgly = gsms.Where(p => p.Routes != null).ToList(); + if (allPsmsOgly.Any()) //In the all gsms. is there any gsms contain localization informaiton(route) { SingleFDRAnalysis(allPsmsOgly, commonParameters, new List { taskId }); var writtenFileOGlyco = Path.Combine(individualFileFolderPath, individualFileFolder + "oglyco" + ".psmtsv"); @@ -248,7 +283,7 @@ private void SingleFDRAnalysis(List items, CommonParameters new FdrAnalysisEngine(psms, 0, commonParameters, this.FileSpecificParameters, taskIds).Run(); } - private void GlycoProteinAnalysis(List gsms, string outputFolder, string individualFileFolder = null, MyTaskResults myTaskResults = null) + private void GlycoProteinAnalysis(List gsms, string outputFolder, string individualFileFolder = null, MyTaskResults myTaskResults = null ) { // convert gsms to psms List psmsForProteinParsimony = gsms.Select(p => p as SpectralMatch).ToList(); @@ -266,6 +301,7 @@ private void GlycoProteinAnalysis(List gsms, string outputFo Status("Done constructing protein groups!", Parameters.SearchTaskId); WriteProteinResults(outputFolder, individualFileFolder, myTaskResults); + } private void GlycoAccessionAnalysis(List gsms, string individualFileFolderPath, string individualFileFolder = null) { @@ -309,9 +345,7 @@ private void WriteProteinResults(string outputFolder, string individualFileFolde string fileName = "AllProteinGroups.tsv"; string writtenFile = Path.Combine(outputFolder, individualFileFolder + "_"+ fileName); WriteProteinGroupsToTsv(ProteinGroups, writtenFile, new List { Parameters.SearchTaskId }, qValueCutoff_FORDEBUGONLY); - if (myTaskResults is not null) - myTaskResults.AddTaskSummaryText("All target protein groups within 1% FDR: " + ProteinGroups. - Count(p => p.QValue <= 0.01 && !p.IsDecoy && !p.IsContaminant)); + } private void WriteProteinGroupsToTsv(List proteinGroups, string filePath, List nestedIds, double qValueCutoff) diff --git a/MetaMorpheus/Test/TestOGlyco.cs b/MetaMorpheus/Test/TestOGlyco.cs index 7bb97e3f5..8932e0ce4 100644 --- a/MetaMorpheus/Test/TestOGlyco.cs +++ b/MetaMorpheus/Test/TestOGlyco.cs @@ -97,7 +97,7 @@ public static void OGlycoTest_GetK() [Test] public static void OGlycoTest_OGlycanChildIons() { - var glycan = GlycanBox.GlobalOGlycans[5]; + var glycan = GlycanBox.GlobalOGlycans[5]; // we use the glycan (N(H)(N(H))) Assert.That(glycan.Ions.Count == 5); @@ -197,13 +197,12 @@ public static void OGlycoTest_FragmentIons2() { //Get glycanBox var glycanBox = OGlycanBoxes[24]; - Protein protein = new Protein("TVYLGASK", ""); var peptide = protein.Digest(new DigestionParams(), new List(), new List()).First(); List modPos = new List { 2, 8 }; - var peptideWithMod = GlycoPeptides.OGlyGetTheoreticalPeptide(modPos.ToArray(), peptide, OGlycanBoxes[24]); + var peptideWithMod = GlycoPeptides.OGlyGetTheoreticalPeptide(modPos.ToArray(), peptide, glycanBox); Assert.That(peptideWithMod.FullSequence == "T[O-Glycosylation:H1N1 on X]VYLGAS[O-Glycosylation:H1N1A1 on X]K"); var fragments_etd = GlycoPeptides.OGlyGetTheoreticalFragments(DissociationType.ETD, new List(), peptide, peptideWithMod); @@ -507,11 +506,11 @@ public static void OGlycoTest_Run5() int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); //For GlycoPSMs - var glycoPsmLine = allResultTxtLines.First(p => p.Contains("Glyco PSMs within")); + var glycoPsmLine = allResultTxtLines.First(p => p.Contains("O-Glyco PSMs within")); int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file //For Level1GlycoPSMs - var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 Glyco PSMs within")); + var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 O-Glyco PSMs within")); int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file // Parse counted number from csv files @@ -587,11 +586,11 @@ public static void OGlycoTest_Run5_WriteContaminants() int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); //For GlycoPSMs - var glycoPsmLine = allResultTxtLines.First(p => p.Contains("Glyco PSMs within")); + var glycoPsmLine = allResultTxtLines.First(p => p.Contains("O-Glyco PSMs within")); int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file //For Level1GlycoPSMs - var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 Glyco PSMs within")); + var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 O-Glyco PSMs within")); int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file // Parse counted number from csv files @@ -684,11 +683,11 @@ public static void OGlycoTest_Run5_WriteDecoys() // Test writing decoys, and mak int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); //For GlycoPSMs - var glycoPsmLine = allResultTxtLines.First(p => p.Contains("Glyco PSMs within")); + var glycoPsmLine = allResultTxtLines.First(p => p.Contains("O-Glyco PSMs within")); int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file //For Level1GlycoPSMs - var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 Glyco PSMs within")); + var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 O-Glyco PSMs within")); int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file // Parse counted number from csv files From 359154d2056949b9c88c4153d68d30b9fd77181a Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Wed, 3 Jul 2024 18:15:17 -0500 Subject: [PATCH 08/13] In order to pass the converage, add the new model in the tester "N-glycan fragment" --- .../EngineLayer/GlycoSearch/GlycanDatabase.cs | 66 +++++++++++-------- MetaMorpheus/Test/TestNGlyco.cs | 6 +- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs index b716fb081..3d7f05e77 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs @@ -128,28 +128,32 @@ public static IEnumerable LoadStructureGlycan(string filePath, bool IsOG //From https://github.com/mobiusklein/glycopeptidepy/structure/fragmentation_strategy/glycan.py#L408 //The fragment generation is not as good as structure based method. So it is better to use a structure based N-Glycan database. // The function is used to load the database from the different formats, but we don't use it now. - public static List NGlycanCompositionFragments(byte[] kind) + public static List NGlycanCompositionFragments(byte[] kind, bool isfucExtended = false) { int glycan_mass = Glycan.GetMass(kind); - int core_count = 1; + // int core_count = 1; int iteration_count = 0; + int hexnac_Core = 2; + int hexose_Core = 3; bool extended = true; - bool extended_fucosylation = false; + bool extended_fucosylation = isfucExtended; int fuc_count = kind[4]; int xyl_count = kind[9]; - int hexnac_inaggregate = kind[0]; - int hexose_inaggregate = kind[1]; + int hexnac_total = kind[1]; + int hexose_total = kind[0]; List glycanIons = new List(); - int base_hexnac = Math.Min(hexnac_inaggregate + 1, 3); - for (int hexnac_count = 0; hexnac_count < base_hexnac; hexnac_count++) + int base_hexnac = Math.Min(hexnac_total, hexnac_Core); // base_hexnac is the first priority hexnac count, they all come from the core. + for (int hexnac_count = 0; hexnac_count < base_hexnac + 1 ; hexnac_count++) { if (hexnac_count == 0) { - GlycanIon glycanIon = new GlycanIon(null, 8303819, new byte[] { 0, (byte)hexnac_count, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, glycan_mass - 8303819); + byte[] startKind = new byte[] { 0, (byte)hexnac_count, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + string glycanName = Glycan.GetKindString(startKind); + GlycanIon glycanIon = new GlycanIon(glycanName, 8303819, startKind, glycan_mass - 8303819); glycanIons.Add(glycanIon); } else if (hexnac_count == 1) @@ -191,7 +195,7 @@ public static List NGlycanCompositionFragments(byte[] kind) for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++) { - GlycanIon add_fuc_glycanIon = ExtendGlycanIon(glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass); + GlycanIon add_fuc_glycanIon = ExtendGlycanIon(glycanIon, 0, 0, 1, 0, glycan_mass); glycanIons.Add(add_fuc_glycanIon); } @@ -209,22 +213,25 @@ public static List NGlycanCompositionFragments(byte[] kind) } - int min_hexose_inaggregate = Math.Min(hexose_inaggregate + 1, 4); - for (int hexose_count = 1; hexose_count <= min_hexose_inaggregate; hexose_count++) + int base_hexose = Math.Min(hexose_total, hexose_Core); // base_hexose is the first priority hexose count, they all come from the core. + for (int hexose_count = 1; hexose_count <= base_hexose + 1; hexose_count++) { GlycanIon hexose_glycanIon = GenerateGlycanIon((byte)hexose_count, (byte)hexnac_count, 0, 0, glycan_mass); glycanIons.Add(hexose_glycanIon); if (!extended_fucosylation) { - GlycanIon fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, 1, 0, glycan_mass); - glycanIons.Add(fuc_glycanIon); - - if (iteration_count < xyl_count) + if (iteration_count < fuc_count) { - GlycanIon xyl_fuc_glycanIon = ExtendGlycanIon(fuc_glycanIon, 0, 0, 0, 1, glycan_mass); - glycanIons.Add(xyl_fuc_glycanIon); - } + GlycanIon fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, 1, 0, glycan_mass); + glycanIons.Add(fuc_glycanIon); + + if (iteration_count < xyl_count) + { + GlycanIon xyl_fuc_glycanIon = ExtendGlycanIon(fuc_glycanIon, 0, 0, 0, 1, glycan_mass); + glycanIons.Add(xyl_fuc_glycanIon); + } + } } else if (fuc_count > 0) { @@ -233,7 +240,7 @@ public static List NGlycanCompositionFragments(byte[] kind) for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++) { - GlycanIon add_fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass); + GlycanIon add_fuc_glycanIon = ExtendGlycanIon(hexose_glycanIon, 0, 0, 1, 0, glycan_mass); glycanIons.Add(add_fuc_glycanIon); } @@ -250,11 +257,11 @@ public static List NGlycanCompositionFragments(byte[] kind) glycanIons.Add(xyl_glycanIon); } - if (hexose_count == 3 && hexnac_count >= 2 * core_count && extended) + if (hexose_count == hexose_Core && hexnac_count >= hexnac_Core && extended) //After the core motif has been exhausted, speculatively add on the remaining core monosaccharides sequentially until exhausted. { - for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_inaggregate - hexnac_count + 1; extra_hexnac_count++) + for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_total - hexnac_count + 1; extra_hexnac_count++) { - if (extra_hexnac_count + hexnac_count > hexnac_inaggregate) + if (extra_hexnac_count + hexnac_count > hexnac_total) { continue; } @@ -283,7 +290,7 @@ public static List NGlycanCompositionFragments(byte[] kind) for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++) { - GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass); + GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, 1, 0, glycan_mass); glycanIons.Add(add_fuc_glycanIon); } @@ -302,9 +309,9 @@ public static List NGlycanCompositionFragments(byte[] kind) } - for (int extra_hexose_count = 1; extra_hexose_count < hexose_inaggregate - hexose_count + 1; extra_hexose_count++) + for (int extra_hexose_count = 1; extra_hexose_count < hexose_total - hexose_Core + 1; extra_hexose_count++) { - if (extra_hexose_count + hexose_count > hexose_inaggregate) + if (extra_hexose_count + hexose_count > hexose_total) { continue; } @@ -331,7 +338,7 @@ public static List NGlycanCompositionFragments(byte[] kind) for (int add_fuc_count = 2; add_fuc_count <= fuc_count; add_fuc_count++) { - GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, (byte)add_fuc_count, 0, glycan_mass); + GlycanIon add_fuc_glycanIon = ExtendGlycanIon(new_glycanIon, 0, 0, 1, 0, glycan_mass); glycanIons.Add(add_fuc_glycanIon); } @@ -367,7 +374,9 @@ private static GlycanIon GenerateGlycanIon(byte hexose_count, byte hexnac_count, int ionMass = Glycan.GetMass(ionKind); - GlycanIon glycanIon = new GlycanIon(null, ionMass, ionKind, glycan_mass - ionMass); + String glycanName = Glycan.GetKindString(ionKind); + + GlycanIon glycanIon = new GlycanIon(glycanName, ionMass, ionKind, glycan_mass - ionMass); return glycanIon; } @@ -381,8 +390,9 @@ private static GlycanIon ExtendGlycanIon(GlycanIon glycanIon, byte hexose_count, ionKind[9] += xyl_count; int ionMass = Glycan.GetMass(ionKind); + string glycanName = Glycan.GetKindString(ionKind); - GlycanIon extend_glycanIon = new GlycanIon(null, ionMass, ionKind, glycan_mass - ionMass); + GlycanIon extend_glycanIon = new GlycanIon(glycanName, ionMass, ionKind, glycan_mass - ionMass); return extend_glycanIon; } diff --git a/MetaMorpheus/Test/TestNGlyco.cs b/MetaMorpheus/Test/TestNGlyco.cs index 24e3c6612..c210ad8e6 100644 --- a/MetaMorpheus/Test/TestNGlyco.cs +++ b/MetaMorpheus/Test/TestNGlyco.cs @@ -291,10 +291,12 @@ public static void GlyTest_BinarySearch() [Test] public static void GlyTest_NGlycanCompositionFragments() { - var kind = GlycanDatabase.String2Kind("HexNAc(3)Hex(4)Fuc(2)NeuAc(1)"); + var kind = GlycanDatabase.String2Kind("HexNAc(3)Hex(4)Fuc(2)NeuAc(1)Xylose(1)"); var ions = GlycanDatabase.NGlycanCompositionFragments(kind); + var ions_fucExtended = GlycanDatabase.NGlycanCompositionFragments(kind, true); + Glycan glycan = Glycan.Struct2Glycan("(N(F)(N(H(H)(H(N(F)(H(A)))))))", 0); var ionMass = ions.Select(p => p.IonMass).ToList(); @@ -303,7 +305,7 @@ public static void GlyTest_NGlycanCompositionFragments() var overlap = glycanIonmass.Intersect(ionMass).Count(); - Assert.That(overlap == 13); + Assert.That(overlap == 15); } } From 09965d8b5838e46359c0d635f5ef6a4143254643 Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Thu, 4 Jul 2024 11:40:28 -0500 Subject: [PATCH 09/13] Update 7/4/2024 1. add new tester model for "OGlycanCompositionFragments" --- .../EngineLayer/GlycoSearch/GlycanDatabase.cs | 20 +++++++++---------- MetaMorpheus/Test/TestNGlyco.cs | 15 ++++++++++---- MetaMorpheus/Test/TestOGlyco.cs | 13 ++++++++++++ 3 files changed, 34 insertions(+), 14 deletions(-) diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs index 3d7f05e77..e0856a909 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs @@ -261,7 +261,7 @@ public static List NGlycanCompositionFragments(byte[] kind, bool isfu { for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_total - hexnac_count + 1; extra_hexnac_count++) { - if (extra_hexnac_count + hexnac_count > hexnac_total) + if (extra_hexnac_count + hexnac_count > hexnac_total) // this part is doesn't make sense, because the hexnac_count cannot be larger than total-hexnac { continue; } @@ -311,7 +311,7 @@ public static List NGlycanCompositionFragments(byte[] kind, bool isfu for (int extra_hexose_count = 1; extra_hexose_count < hexose_total - hexose_Core + 1; extra_hexose_count++) { - if (extra_hexose_count + hexose_count > hexose_total) + if (extra_hexose_count + hexose_count > hexose_total) // this part is doesn't make sense, because the hexnac_count cannot be larger than total-hexnac { continue; } @@ -411,12 +411,12 @@ public static List OGlycanCompositionFragments(byte[] kind) bool extended = true; int fuc_count = kind[4]; - int hexnac_inaggregate = kind[0]; - int hexose_inaggregate = kind[1]; + int hexnac_total = kind[1]; + int hexose_total = kind[0]; for (int hexnac_count = 0; hexnac_count < 3; hexnac_count++) { - if (hexnac_inaggregate < hexnac_count) + if (hexnac_total < hexnac_count) { continue; } @@ -437,7 +437,7 @@ public static List OGlycanCompositionFragments(byte[] kind) for (int hexose_count = 0; hexose_count < 2; hexose_count++) { - if (hexose_inaggregate < hexose_count) + if (hexose_total < hexose_count) { continue; } @@ -457,9 +457,9 @@ public static List OGlycanCompositionFragments(byte[] kind) // After the core motif has been exhausted, speculatively add on the remaining core monosaccharides sequentially until exhausted. - if (extended && hexnac_inaggregate - hexnac_count >= 0) + if (extended && hexnac_total - hexnac_count >= 0) { - for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_inaggregate - hexnac_count + 1; extra_hexnac_count ++) + for (int extra_hexnac_count = 0; extra_hexnac_count < hexnac_total - hexnac_count + 1; extra_hexnac_count ++) { if (extra_hexnac_count > 0) { @@ -477,9 +477,9 @@ public static List OGlycanCompositionFragments(byte[] kind) } - if (hexose_inaggregate > hexose_count && hexose_count > 0) + if (hexose_total > hexose_count && hexose_count > 0) { - for (int extra_hexose_count = 0; extra_hexose_count < hexose_inaggregate - hexose_count; extra_hexose_count++) + for (int extra_hexose_count = 0; extra_hexose_count < hexose_total - hexose_count; extra_hexose_count++) { if (extra_hexose_count > 0 && extra_hexose_count + hexose_count >0) { diff --git a/MetaMorpheus/Test/TestNGlyco.cs b/MetaMorpheus/Test/TestNGlyco.cs index c210ad8e6..b8c8c940b 100644 --- a/MetaMorpheus/Test/TestNGlyco.cs +++ b/MetaMorpheus/Test/TestNGlyco.cs @@ -291,15 +291,22 @@ public static void GlyTest_BinarySearch() [Test] public static void GlyTest_NGlycanCompositionFragments() { - var kind = GlycanDatabase.String2Kind("HexNAc(3)Hex(4)Fuc(2)NeuAc(1)Xylose(1)"); + var testKind = GlycanDatabase.String2Kind("HexNAc(3)Hex(4)Fuc(2)NeuAc(1)Xylose(1)"); - var ions = GlycanDatabase.NGlycanCompositionFragments(kind); + var ions_NotFucExtended = GlycanDatabase.NGlycanCompositionFragments(testKind); - var ions_fucExtended = GlycanDatabase.NGlycanCompositionFragments(kind, true); + var ions_fucExtended = GlycanDatabase.NGlycanCompositionFragments(testKind, true); + + Assert.That(ions_fucExtended.Count >= ions_NotFucExtended.Count); + Assert.That(ions_NotFucExtended.Count == 35); + Assert.That(ions_fucExtended.Count == 43); + + + var kind = GlycanDatabase.String2Kind("HexNAc(3)Hex(4)Fuc(2)NeuAc(1)"); Glycan glycan = Glycan.Struct2Glycan("(N(F)(N(H(H)(H(N(F)(H(A)))))))", 0); - var ionMass = ions.Select(p => p.IonMass).ToList(); + var ionMass = ions_NotFucExtended.Select(p => p.IonMass).ToList(); var glycanIonmass = glycan.Ions.Select(p => p.IonMass).ToList(); diff --git a/MetaMorpheus/Test/TestOGlyco.cs b/MetaMorpheus/Test/TestOGlyco.cs index 8932e0ce4..2e7cc5743 100644 --- a/MetaMorpheus/Test/TestOGlyco.cs +++ b/MetaMorpheus/Test/TestOGlyco.cs @@ -109,6 +109,19 @@ public static void OGlycoTest_OGlycanChildIons() var coreIons = GlycanDatabase.OGlycanCompositionFragments(kind); Assert.That(coreIons.Count() == 6); + + //The following code is to test the glycan with complex structure, only to pass the converage. + + var testKind = GlycanDatabase.String2Kind("HexNAc(2)Hex(4)Fuc(2)NeuAc(1)Xylose(1)"); + + var testGlycanIons = GlycanDatabase.OGlycanCompositionFragments(testKind); + + + var testKind_smallGlycan = GlycanDatabase.String2Kind("HexNAc(1)"); + + var testGlycanIons_smallGlycan = GlycanDatabase.OGlycanCompositionFragments(testKind_smallGlycan); + + } [Test] From e5f3f491904dac0832f61525b71e7a3c3a1d3367 Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Fri, 5 Jul 2024 12:18:27 -0500 Subject: [PATCH 10/13] update 7/5/2024 1. add the tester for writing function, in different search type 2. glycoBox tester for decoy glycanBox --- .../EngineLayer/GlycoSearch/GlycanBox.cs | 2 +- MetaMorpheus/Test/TestOGlyco.cs | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs index 845d7bafc..dee35a258 100644 --- a/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs +++ b/MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs @@ -60,7 +60,7 @@ public static IEnumerable BuildOGlycanBoxes(int maxNum, bool buildDec if (buildDecoy) { - GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray()); + GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray(),false); // decoy glycanBox glycanBox_decoy.TargetDecoy = false; glycanBox_decoy.ChildGlycanBoxes = BuildChildOGlycanBoxes(glycanBox_decoy.NumberOfMods, glycanBox_decoy.ModIds, glycanBox_decoy.TargetDecoy).ToArray(); yield return glycanBox_decoy; diff --git a/MetaMorpheus/Test/TestOGlyco.cs b/MetaMorpheus/Test/TestOGlyco.cs index 2e7cc5743..39228218e 100644 --- a/MetaMorpheus/Test/TestOGlyco.cs +++ b/MetaMorpheus/Test/TestOGlyco.cs @@ -48,6 +48,46 @@ public static void OGlycoTest_LoadGlycanBox() Assert.AreEqual(OGlycanBoxes.Count(), 454); } + [Test] + public static void OGlycanTest_GetGlycanBox_Decoy() + { + GlycanBox[] OGlycanBoxes = GlycanBox.BuildOGlycanBoxes(3).ToArray(); + Assert.That(OGlycanBoxes.All(p => p.TargetDecoy = true)); + + GlycanBox[] OGlycanBoxes_withDecoys = GlycanBox.BuildOGlycanBoxes(3, true).ToArray(); + var group_target = OGlycanBoxes_withDecoys.GroupBy(p => p.TargetDecoy == true); + var group_decoy = OGlycanBoxes_withDecoys.GroupBy(p => p.TargetDecoy == false); + Assert.That(group_target.Count() == group_decoy.Count()); + + } + + [Test] + public static void GlycoTest_WritingSummary() // In order to test writing function on different search type ex. O-Search, N-search, N-O search, make sure we have the corresponding search Rseult file. + { + string outputFolder_NSearch = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); + Directory.CreateDirectory(outputFolder_NSearch); + + var glycoSearchTask_NSearch = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSearchTaskconfigNGlycoTest_Run.toml"), MetaMorpheusTask.tomlConfig); + + DbForTask db = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P16150.fasta"), false); + string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\2019_09_16_StcEmix_35trig_EThcD25_rep1_9906.mgf"); + new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask_NSearch) }, new List { spectraFile }, new List { db }, outputFolder_NSearch).Run(); + + Directory.Delete(outputFolder_NSearch, true); + + + string outputFolder_NOSearch = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); + Directory.CreateDirectory(outputFolder_NOSearch); + + var glycoSearchTask_NOSearch = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSearchTaskconfigN_OGlycoTest_Run.toml"), MetaMorpheusTask.tomlConfig); + + string spectraFile_NOSearch = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\2019_09_16_StcEmix_35trig_EThcD25_rep1_9906.mgf"); + new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask_NOSearch) }, new List { spectraFile_NOSearch }, new List { db }, outputFolder_NOSearch).Run(); + + Directory.Delete(outputFolder_NOSearch, true); + } + + [Test] public static void GlycoSpectralHeader() { @@ -482,6 +522,7 @@ public static void OGlycoTest_Run4() Directory.Delete(outputFolder, true); } + [Test] public static void OGlycoTest_Run5() { From a77cd36059cb7c3f45a25918ca3caf6bd24e9110 Mon Sep 17 00:00:00 2001 From: RayMSMS Date: Thu, 11 Jul 2024 14:03:51 -0500 Subject: [PATCH 11/13] update 7/11/2024 1. delete the bin and retry to pass the tester --- .../GlycoSearchTaskconfigNGlycoTest_Run.toml | 65 +++++++++++++++++++ ...GlycoSearchTaskconfigN_OGlycoTest_Run.toml | 65 +++++++++++++++++++ MetaMorpheus/Test/Test.csproj | 6 ++ 3 files changed, 136 insertions(+) create mode 100644 MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigNGlycoTest_Run.toml create mode 100644 MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigN_OGlycoTest_Run.toml diff --git a/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigNGlycoTest_Run.toml b/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigNGlycoTest_Run.toml new file mode 100644 index 000000000..ba75b96b2 --- /dev/null +++ b/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigNGlycoTest_Run.toml @@ -0,0 +1,65 @@ +TaskType = "GlycoSearch" + +[_glycoSearchParameters] +OGlycanDatabasefile = "OGlycan.gdb" +NGlycanDatabasefile = "NGlycan.gdb" +GlycoSearchType = "NGlycanSearch" +OxoniumIonFilt = true +DecoyType = "Reverse" +GlycoSearchTopNum = 50 +MaximumOGlycanAllowed = 4 +DoParsimony = true +NoOneHitWonders = false +ModPeptidesAreDifferent = false +WriteIndividualFiles = false +WriteDecoys = true +WriteContaminants = true + +[CommonParameters] +TaskDescriptor = "GlycoSearchTask" +MaxThreadsToUsePerFile = 7 +ListOfModsFixed = "Common Fixed\tCarbamidomethyl on C\t\tCommon Fixed\tCarbamidomethyl on U" +ListOfModsVariable = "Common Variable\tOxidation on M" +DoPrecursorDeconvolution = true +UseProvidedPrecursorInfo = true +DeconvolutionIntensityRatio = 3.0 +DeconvolutionMaxAssumedChargeState = 12 +DeconvolutionMassTolerance = "±4.0000 PPM" +TotalPartitions = 1 +ProductMassTolerance = "±20.0000 PPM" +PrecursorMassTolerance = "±10.0000 PPM" +AddCompIons = false +ScoreCutoff = 3.0 +ReportAllAmbiguity = true +NumberOfPeaksToKeepPerWindow = 1000 +MinimumAllowedIntensityRatioToBasePeak = 0.01 +NormalizePeaksAccrossAllWindows = false +TrimMs1Peaks = false +TrimMsMsPeaks = false +UseDeltaScore = false +QValueOutputFilter = 1.0 +PepQValueOutputFilter = 1.0 +CustomIons = ["c", "zDot"] +AssumeOrphanPeaksAreZ1Fragments = true +MaxHeterozygousVariants = 4 +MinVariantDepth = 1 +AddTruncations = false +DissociationType = "EThcD" +SeparationType = "HPLC" +MS2ChildScanDissociationType = "Unknown" +MS3ChildScanDissociationType = "Unknown" + +[CommonParameters.DigestionParams] +MaxMissedCleavages = 5 +InitiatorMethionineBehavior = "Variable" +MinPeptideLength = 5 +MaxPeptideLength = 60 +MaxModificationIsoforms = 1024 +MaxModsForPeptide = 2 +Protease = "StcE-trypsin" +SearchModeType = "Full" +FragmentationTerminus = "Both" +SpecificProtease = "StcE-trypsin" +GeneratehUnlabeledProteinsForSilac = true +KeepNGlycopeptide = false +KeepOGlycopeptide = false diff --git a/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigN_OGlycoTest_Run.toml b/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigN_OGlycoTest_Run.toml new file mode 100644 index 000000000..01bd6c743 --- /dev/null +++ b/MetaMorpheus/Test/GlycoTestData/GlycoSearchTaskconfigN_OGlycoTest_Run.toml @@ -0,0 +1,65 @@ +TaskType = "GlycoSearch" + +[_glycoSearchParameters] +OGlycanDatabasefile = "OGlycan.gdb" +NGlycanDatabasefile = "NGlycan.gdb" +GlycoSearchType = "N_O_GlycanSearch" +OxoniumIonFilt = true +DecoyType = "Reverse" +GlycoSearchTopNum = 50 +MaximumOGlycanAllowed = 4 +DoParsimony = true +NoOneHitWonders = false +ModPeptidesAreDifferent = false +WriteIndividualFiles = false +WriteDecoys = true +WriteContaminants = true + +[CommonParameters] +TaskDescriptor = "GlycoSearchTask" +MaxThreadsToUsePerFile = 7 +ListOfModsFixed = "Common Fixed\tCarbamidomethyl on C\t\tCommon Fixed\tCarbamidomethyl on U" +ListOfModsVariable = "Common Variable\tOxidation on M" +DoPrecursorDeconvolution = true +UseProvidedPrecursorInfo = true +DeconvolutionIntensityRatio = 3.0 +DeconvolutionMaxAssumedChargeState = 12 +DeconvolutionMassTolerance = "±4.0000 PPM" +TotalPartitions = 1 +ProductMassTolerance = "±20.0000 PPM" +PrecursorMassTolerance = "±10.0000 PPM" +AddCompIons = false +ScoreCutoff = 3.0 +ReportAllAmbiguity = true +NumberOfPeaksToKeepPerWindow = 1000 +MinimumAllowedIntensityRatioToBasePeak = 0.01 +NormalizePeaksAccrossAllWindows = false +TrimMs1Peaks = false +TrimMsMsPeaks = false +UseDeltaScore = false +QValueOutputFilter = 1.0 +PepQValueOutputFilter = 1.0 +CustomIons = ["c", "zDot"] +AssumeOrphanPeaksAreZ1Fragments = true +MaxHeterozygousVariants = 4 +MinVariantDepth = 1 +AddTruncations = false +DissociationType = "EThcD" +SeparationType = "HPLC" +MS2ChildScanDissociationType = "Unknown" +MS3ChildScanDissociationType = "Unknown" + +[CommonParameters.DigestionParams] +MaxMissedCleavages = 5 +InitiatorMethionineBehavior = "Variable" +MinPeptideLength = 5 +MaxPeptideLength = 60 +MaxModificationIsoforms = 1024 +MaxModsForPeptide = 2 +Protease = "StcE-trypsin" +SearchModeType = "Full" +FragmentationTerminus = "Both" +SpecificProtease = "StcE-trypsin" +GeneratehUnlabeledProteinsForSilac = true +KeepNGlycopeptide = false +KeepOGlycopeptide = false diff --git a/MetaMorpheus/Test/Test.csproj b/MetaMorpheus/Test/Test.csproj index 297abbe99..e6b47164b 100644 --- a/MetaMorpheus/Test/Test.csproj +++ b/MetaMorpheus/Test/Test.csproj @@ -99,6 +99,12 @@ Always + + Always + + + Always + Always From 494c8212f4a2d82caacdd14ee56fc8fcabfd2d04 Mon Sep 17 00:00:00 2001 From: RayMSMS <150720362+RayMSMS@users.noreply.github.com> Date: Tue, 6 Aug 2024 13:04:31 -0500 Subject: [PATCH 12/13] update 8/6/2024 deleted the duplicate tester --- MetaMorpheus/Test/TestOGlyco.cs | 82 --------------------------------- 1 file changed, 82 deletions(-) diff --git a/MetaMorpheus/Test/TestOGlyco.cs b/MetaMorpheus/Test/TestOGlyco.cs index 42466830d..2472e2adf 100644 --- a/MetaMorpheus/Test/TestOGlyco.cs +++ b/MetaMorpheus/Test/TestOGlyco.cs @@ -738,88 +738,6 @@ public static void OGlycoTest_Run5_WriteContaminants() Directory.Delete(outputFolder, true); } - [Test] - public static void OGlycoTest_Run5_WriteContaminants() - { - string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); - Directory.CreateDirectory(outputFolder); - - var glycoSearchTask = Toml.ReadFile(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSnip.toml"), MetaMorpheusTask.tomlConfig); - glycoSearchTask._glycoSearchParameters.WriteContaminants = true; // write contaminants to the output folder - - DbForTask targetDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoProteinFASTA_7proteins.fasta"), false); - DbForTask contaminDbForTask = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P13987_contaminant.fasta"), true); - string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoPepMix_snip.mzML"); - new EverythingRunnerEngine( - new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, - new List { spectraFile }, - new List { targetDbForTask, contaminDbForTask }, - outputFolder).Run(); - - - // TODO: Test output, make sure the values on the results.txt really reflect the number counted in the csv files - // Parse values from results.txt - string resultsTextPath = Directory.GetFiles(outputFolder, "allResults.txt", SearchOption.TopDirectoryOnly) - .FirstOrDefault(); // Try to find the file name "allResults.txt" in the output folder - if (resultsTextPath is null) - Assert.Fail("Results file not found."); - string[] allResultTxtLines = File.ReadAllLines(resultsTextPath); //read all lines from the file - Assert.That(allResultTxtLines.Length > 0); // make sure there are lines in the file - - //For PSMs - var allPsmLine = allResultTxtLines.First(p => p.Contains("target PSMs within")); - int psmCount = int.Parse(allPsmLine.Split(':').Last().Trim()); - - //For ProteinGroups - var proteinGroupLine = allResultTxtLines.First(p => p.Contains("protein groups within")); - int proteinGroupCount = int.Parse(proteinGroupLine.Split(':').Last().Trim()); - - //For GlycoPSMs - var glycoPsmLine = allResultTxtLines.First(p => p.Contains("O-Glyco PSMs within")); - int glycoPsmCount = int.Parse(glycoPsmLine.Split(':').Last().Trim()); // read the number of glyco PSMs from the results file - - //For Level1GlycoPSMs - var level1PsmLine = allResultTxtLines.First(p => p.Contains("Level 1 O-Glyco PSMs within")); - int level1Psmcount = int.Parse(level1PsmLine.Split(':').Last().Trim()); // read the number of Level1-PSMs from the results file - - // Parse counted number from csv files - - //For PSMs - var allPsmPath = Path.Combine(outputFolder, "Task", "AllPSMs.psmtsv"); - List onePercentPsms1 = PsmTsvReader.ReadTsv(allPsmPath, out var errors2) - .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); - Assert.That(errors2.Count == 0);// if we cannot find the file, we will get an error message - int readInPsmsCount = onePercentPsms1.Count; - - //For ProteinGroups - var allProteinGroupsPath = Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv"); - string[] proteinGroupHeaders = File.ReadAllLines(allProteinGroupsPath).First().Split("\t"); - int readInProteinCount = File.ReadAllLines(Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv")).Skip(1) - .Select(line => line.Split('\t')) - .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) - && qVaule < 0.01 && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "C" - && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "D"); - - //For GlycoPSMs - string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); - List onePercentoGlycoPsms = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects - .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); // the filtering (Q<0.01) - int readInGlycoPsmCount = onePercentoGlycoPsms.Count; // the gPSMs number with Fdr<0.01 - Assert.That(errors.Count == 0);// if we cannot find the file, we will get an error message - - //For Level1GlycoPSMs - int readInLevel1GlycoPsmCount = onePercentoGlycoPsms.Count(p => p.GlycanLocalizationLevel == EngineLayer.GlycoSearch.LocalizationLevel.Level1); //the level1 gPSMs number - - //Compare the numbers - Assert.That(psmCount, Is.EqualTo(readInPsmsCount)); - Assert.That(proteinGroupCount, Is.EqualTo(readInProteinCount)); - Assert.That(glycoPsmCount, Is.EqualTo(readInGlycoPsmCount)); - Assert.That(level1Psmcount, Is.EqualTo(readInLevel1GlycoPsmCount)); - - - Directory.Delete(outputFolder, true); - } - [Test] public static void OGlycoTest_Run5_WriteDecoys() // Test writing decoys, and make sure we can filter the decoys PSMs { From 74f04d42c3b23079190bb8330182c1930d9c3b6e Mon Sep 17 00:00:00 2001 From: RayMSMS <150720362+RayMSMS@users.noreply.github.com> Date: Tue, 6 Aug 2024 13:37:00 -0500 Subject: [PATCH 13/13] store the code --- MetaMorpheus/Test/TestOGlyco.cs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/MetaMorpheus/Test/TestOGlyco.cs b/MetaMorpheus/Test/TestOGlyco.cs index 2472e2adf..886ab208a 100644 --- a/MetaMorpheus/Test/TestOGlyco.cs +++ b/MetaMorpheus/Test/TestOGlyco.cs @@ -658,7 +658,7 @@ public static void OGlycoTest_Run5() Directory.Delete(outputFolder, true); } - [Test] + [Test] public static void OGlycoTest_Run5_WriteContaminants() { string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData"); @@ -673,7 +673,7 @@ public static void OGlycoTest_Run5_WriteContaminants() new EverythingRunnerEngine( new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, new List { spectraFile }, - new List { targetDbForTask, contaminDbForTask }, + new List { targetDbForTask, contaminDbForTask }, outputFolder).Run(); @@ -707,7 +707,7 @@ public static void OGlycoTest_Run5_WriteContaminants() //For PSMs var allPsmPath = Path.Combine(outputFolder, "Task", "AllPSMs.psmtsv"); List onePercentPsms1 = PsmTsvReader.ReadTsv(allPsmPath, out var errors2) - .Where(p => p.QValue <= 0.01).ToList(); + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); Assert.That(errors2.Count == 0);// if we cannot find the file, we will get an error message int readInPsmsCount = onePercentPsms1.Count; @@ -716,12 +716,14 @@ public static void OGlycoTest_Run5_WriteContaminants() string[] proteinGroupHeaders = File.ReadAllLines(allProteinGroupsPath).First().Split("\t"); int readInProteinCount = File.ReadAllLines(Path.Combine(outputFolder, "Task", "_AllProteinGroups.tsv")).Skip(1) .Select(line => line.Split('\t')) - .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) && qVaule < 0.01); + .Count(p => double.TryParse(p[Array.IndexOf(proteinGroupHeaders, "Protein QValue")], out double qVaule) + && qVaule < 0.01 && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "C" + && p[Array.IndexOf(proteinGroupHeaders, "Protein Decoy/Contaminant/Target")] != "D"); //For GlycoPSMs string oGlycoPath = Path.Combine(outputFolder, "Task", "oglyco.psmtsv"); List onePercentoGlycoPsms = PsmTsvReader.ReadTsv(oGlycoPath, out var errors) //load the PSMs data from the "csv file" and bulid the objects - .Where(p => p.QValue <= 0.01).ToList(); // the filtering (Q<0.01) + .Where(p => p.QValue <= 0.01 && p.DecoyContamTarget != "C" && p.DecoyContamTarget != "D").ToList(); // the filtering (Q<0.01) int readInGlycoPsmCount = onePercentoGlycoPsms.Count; // the gPSMs number with Fdr<0.01 Assert.That(errors.Count == 0);// if we cannot find the file, we will get an error message