Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the comment on glyco-searching and the search summary on the result file #2367

Merged
merged 24 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
62013c0
The lie's comment about the glyco-searching
RayMSMS May 28, 2024
2e99da2
Try to add the search summary information (PSMs, protein group, glyco…
RayMSMS Jun 4, 2024
d1da509
Merge branch 'master' into glyco-search-comment
nbollis Jun 4, 2024
640c503
Merge branch 'master' into glyco-search-comment
nbollis Jun 5, 2024
a0d2377
add the contaminant tester
RayMSMS Jun 5, 2024
88ce7b5
Merge branch 'glyco-search-comment' of https://github.com/RayMSMS/Met…
RayMSMS Jun 5, 2024
9ff4c2f
Delet the unused constructor of Node class to cheat the coverage check
RayMSMS Jun 6, 2024
02bcfce
Fix the Fdr filter (initial: < 0.1, now <= 0.1)
RayMSMS Jun 6, 2024
206fd27
Try to pass the coverage test, add the docoy filtering tester
RayMSMS Jun 10, 2024
fdd58a3
Merge branch 'master' into glyco-search-comment
nbollis Jun 13, 2024
8b2898a
Merge branch 'master' into glyco-search-comment
nbollis Jun 26, 2024
8a7f48d
update 7/2/2024
RayMSMS Jul 2, 2024
aebf0ec
Merge branch 'glyco-search-comment' of https://github.com/RayMSMS/Met…
RayMSMS Jul 2, 2024
359154d
In order to pass the converage, add the new model in the tester "N-gl…
RayMSMS Jul 3, 2024
09965d8
Update 7/4/2024
RayMSMS Jul 4, 2024
e5f3f49
update 7/5/2024
RayMSMS Jul 5, 2024
3952bf1
Merge branch 'master' into glyco-search-comment
RayMSMS Jul 5, 2024
a77cd36
update 7/11/2024
RayMSMS Jul 11, 2024
64f6bc4
Merge branch 'master' into glyco-search-comment
RayMSMS Jul 30, 2024
2ea7f58
merge in master
RayMSMS Aug 6, 2024
494c821
update 8/6/2024
RayMSMS Aug 6, 2024
74f04d4
store the code
RayMSMS Aug 6, 2024
90d6a0f
Merge branch 'master' into glyco-search-comment
nbollis Aug 15, 2024
ec09432
Merge branch 'master' into glyco-search-comment
nbollis Aug 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace EngineLayer.GlycoSearch
{
//the class is for localization graph matrix. Each node in the matrix is represented by AdjNode.
public class AdjNode
{
//AdjNode -> Adjactent node is used to build graph matrix for localizaiton. Each node in graph matrix contain Sources, max cost, current cost, etc.
Expand Down
62 changes: 35 additions & 27 deletions MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public GlycanIon(string ionStruct, int ionMass, byte[] ionKind, int lossIonMass)
IonStruct = ionStruct;
IonMass = ionMass;
IonKind = ionKind;
LossIonMass = lossIonMass;
LossIonMass = lossIonMass; // neutral loss mass
}
public string IonStruct { get; set; }
public int IonMass { get; set; }
Expand Down Expand Up @@ -57,12 +57,12 @@ public string Composition
public List<GlycanIon> Ions { get; set; }
public bool Decoy { get; private set; }

public HashSet<int> DiagnosticIons
public HashSet<int> DiagnosticIons //B ions, and there are more ions to set...
{
get
{
{ //kind[] is the sugar type composition of glycan, and each index represent the corresponding sugar type.
HashSet<int> diagnosticIons = new HashSet<int>();
if (Kind[0] >= 1)
if (Kind[0] >= 1) //if we have Hexose(the number more than one), then we have the corresponding diagonsitic ions as below.
{
diagnosticIons.Add(10902895 - hydrogenAtomMonoisotopicMass);
diagnosticIons.Add(11503951 - hydrogenAtomMonoisotopicMass);
Expand Down Expand Up @@ -134,10 +134,10 @@ public HashSet<int> DiagnosticIons
{"Xylose", new Tuple<char, int>('X', 9) }
};

public readonly static HashSet<int> CommonOxoniumIons = new HashSet<int>
{13805550, 16806607, 18607663, 20408720, 36614002 };
public readonly static HashSet<int> CommonOxoniumIons = new HashSet<int> //The same ion as we describe above in the diagnostic ions. That just for the initial matching with the gkycan.
{13805550, 16806607, 18607663, 20408720, 36614002 };// some software use the ions to predict verified glycopeptide (pre-filter).

public readonly static int[] AllOxoniumIons = new int[]
public readonly static int[] AllOxoniumIons = new int[] //The same ion as we describe above in the diagnostic ions. We didn't use the ions for matching now.
{10902895, 11503951, 12605550, 12703952, 13805550, 14406607, 16306064, 16806607, 18607663, 20408720, 27409268, 29008759, 29210324, 30809816, 36614002, 65723544, 67323035};

//TrimannosylCore is only useful for N-Glyco peptides.
Expand All @@ -160,14 +160,17 @@ public HashSet<int> DiagnosticIons

#region Glycan Structure manipulation

//There are two ways to represent a glycan in string, one only combination, the other structure.
//The method generate a glycan by read in a glycan structure string from database.
//There are two ways to represent a glycan in string, one is only composition, and the other is included linkage and composition information.
// first one: HexNAc(2)Hex(5)NeuAc(1)NeuGc(1)Fuc(1)Phospho(1)Sulfo(1)Na(1)Ac(1)Xylose(1), second one: (N(H(A))(N(H(A))(F)))

//The method generate a glycan by reading the glycan structure string from database.
// input : (N(H(A))(N(H(A))(F))), output: Glycan object.
public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglycan = false)
{
Node node = Struct2Node(theGlycanStruct);
List<Node> nodeIons = GetAllChildrenCombination(node);
int mass = Glycan.GetMass(theGlycanStruct);
byte[] kind = Glycan.GetKind(theGlycanStruct);
Node node = Struct2Node(theGlycanStruct); //Transfer string to tree structure.
List<Node> nodeIons = GetAllChildrenCombination(node); //Get all possible fragmentation/neutral loss of a glycan.
int mass = Glycan.GetMass(theGlycanStruct); //Get glycan mass.
byte[] kind = Glycan.GetKind(theGlycanStruct); //Get glycan composition, which is a byte array, EX. [2, 5, 1, 1, 1, 1, 1, 1, 1, 1].
List<GlycanIon> glycanIons = new List<GlycanIon>();
HashSet<double> ionMasses = new HashSet<double>();
foreach (var aNodeIon in nodeIons)
Expand All @@ -193,25 +196,26 @@ public static Glycan Struct2Glycan(string theGlycanStruct, int id, bool isOglyca
return glycan;
}

//Glycan are represented in tree structures composed of Node. The function here is to transfer a string into connected Node.
//The function here is to transfer a glycan-string into tree format. (Glycan are represented in tree structures composed of Node)
//input: (N(H)), output: Node(N, 0) -> left Child = Node(H, 1)
public static Node Struct2Node(string theGlycanStruct)
{
int level = 0;
Node curr = new Node(theGlycanStruct[1], level);
for (int i = 2; i < theGlycanStruct.Length - 1; i++)
Node curr = new Node(theGlycanStruct[1], level);//The first character is always '(', so the second character is the root of the tree. In this case of (N(H)), N is the root.
for (int i = 2; i < theGlycanStruct.Length - 1; i++) //try to extract the following characters.
{
if (theGlycanStruct[i] == '(')
if (theGlycanStruct[i] == '(') //skip the '(' character.
{
continue;
}
if (theGlycanStruct[i] == ')')
if (theGlycanStruct[i] == ')')//when we meet a ')', we need to go back to the father node.
{
curr = curr.Father;
level--;
}
else
else // when we meet a character, we need to decide where to put it in the tree. (putting priority: left -> right side -> middle)
{
level++;
level++; //first, move to the next level.(Deeper level)
if (curr.LeftChild == null)
{
curr.LeftChild = new Node(theGlycanStruct[i], level);
Expand All @@ -233,7 +237,8 @@ public static Node Struct2Node(string theGlycanStruct)
}
}
}
return curr;
return curr; // return the root of the tree.

}

//The function is to generate all possible fragmentation/neutral loss of a glycan, which is a subset of glycan.
Expand Down Expand Up @@ -364,6 +369,7 @@ private static List<Node> GetAllChildrenCombination(Node node)
}

//Node structure to string structure.
// input: Node(N, 0) -> left Child = Node(H, 1), output: (N(H))
private static string Node2Struct(Node node)
{
string output = "";
Expand All @@ -389,7 +395,7 @@ public static int GetIonLossMass(byte[] Kind, byte[] ionKind)

#region Transfer information

private static int GetMass(string structure)
private static int GetMass(string structure) //Get glycan mass by glycan structure string. structure format : (N(H(A))(N(H(A))(F)))
{
int y = CharMassDic['H'] * structure.Count(p => p == 'H') +
CharMassDic['N'] * structure.Count(p => p == 'N') +
Expand All @@ -405,7 +411,7 @@ private static int GetMass(string structure)
return y;
}

public static int GetMass(byte[] kind)
public static int GetMass(byte[] kind) //Get glycan mass by glycan composition. kind format : [2, 2, 2, 0, 1, 0, 0, 0, 0, 0]
{
int mass = CharMassDic['H'] * kind[0] +
CharMassDic['N'] * kind[1] +
Expand All @@ -422,7 +428,7 @@ public static int GetMass(byte[] kind)
return mass;
}

public static byte[] GetKind(string structure)
public static byte[] GetKind(string structure) //Get glycan composition by the structure string. structure format : (N(H(A))(N(H(A))(F))), output : [2, 2, 2, 0, 1, 0, 0, 0, 0, 0]
{
var kind = new byte[]
{ Convert.ToByte(structure.Count(p => p == 'H')),
Expand All @@ -439,7 +445,7 @@ public static byte[] GetKind(string structure)
return kind;
}

public static string GetKindString(byte[] Kind)
public static string GetKindString(byte[] Kind)//Get glycan composition by the kind[]. kind format : [2, 2, 2, 0, 1, 0, 0, 0, 0, 0], output is H2N2A2F1.
{
string H = Kind[0]==0 ? "" : "H" + Kind[0].ToString();
string N = Kind[1] == 0 ? "" : "N" + Kind[1].ToString();
Expand Down Expand Up @@ -488,7 +494,7 @@ public static Modification NGlycanToModification(Glycan glycan)
return modification;
}

public static Modification OGlycanToModification(Glycan glycan)
public static Modification OGlycanToModification(Glycan glycan) //try to transfer the glycan object to modification object.
{
//TO THINK: what the neutralLoss for O-Glyco?
Dictionary<DissociationType, List<double>> neutralLosses = new Dictionary<DissociationType, List<double>>();
Expand Down Expand Up @@ -528,6 +534,8 @@ public static IEnumerable<IEnumerable<T>> GetKCombs<T>(IEnumerable<T> list, int
return GetKCombs(list, length - 1).SelectMany(t => list.Where(o => o.CompareTo(t.Last()) > 0), (t1, t2) => t1.Concat(new T[] { t2 }));
}

// Try to create the combination with the list, and repeptitation is allowed.
// List is the base list, the length is the length for combination
public static IEnumerable<IEnumerable<T>> GetKCombsWithRept<T>(IEnumerable<T> list, int length) where T : IComparable
{
if (length == 1) return list.Select(t => new T[] { t });
Expand Down Expand Up @@ -573,7 +581,7 @@ public static bool Equals(Glycan glycan1, Glycan glycan2)
return false;
}

public static Glycan[] BuildTargetDecoyGlycans(IEnumerable<Glycan> glycans)
public static Glycan[] BuildTargetDecoyGlycans(IEnumerable<Glycan> glycans) //Build target-decoy glycans for testing.
{
List<Glycan> allGlycans = new List<Glycan>();

Expand Down
4 changes: 3 additions & 1 deletion MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ namespace EngineLayer
//One peptide can have several o-glycans. The combined glycans are grouped as a glycan box. Used for localization.
//GlycanBox -- A defined combination of glycans will be considered to modify on one peptide.
//The GlycanBoxMass is the total mass of all glycans on the peptide
//For example, if we have 3 glycans on one peptide (g1,g2,g3), the GlycanBoxMass is the sum of the three glycans.(glycanBox: [g1,g2,g3])
//By the way, the GlycanBox will be first step in the search, the parameter (Max glycan num in peptide) will be used to limit the capacity of the list.
public class GlycanBox:ModBox
{
public static Glycan[] GlobalOGlycans { get; set; }
Expand All @@ -22,7 +24,7 @@ public class GlycanBox:ModBox

//TO DO: Decoy O-glycan can be created, but the results need to be reasoned.
//public static int[] SugarShift = new int[]{ -16205282, -20307937, -29109542, -14605791, -30709033, -15005282, -36513219, -40615874, 16205282, 20307937, 29109542, 14605791, 30709033, 15005282, 36513219, 40615874 };
private readonly static int[] SugarShift = new int[]
private readonly static int[] SugarShift = new int[] //still unclear about the shift...
{
7103710, 10300920, 11502690, 12904260, 14706840, 5702150, 13705890, 12809500, 11308410, 13104050,
11404290, 9705280, 12805860, 15610110, 8703200, 10104770, 9906840, 18607930, 16306330,
Expand Down
23 changes: 13 additions & 10 deletions MetaMorpheus/EngineLayer/GlycoSearch/GlycanDatabase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

namespace EngineLayer
{

public static class GlycanDatabase
// in our database, the N-glycan.gdb should be correct to the new format
public static class GlycanDatabase
{
//Load Glycan. Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions.
//Load Glycan from the database file (located in the Glycan_Mod). Generally, glycan-ions should be generated for N-Glycopepitdes which produce Y-ions; MS method couldn't produce o-glycan-ions.
public static IEnumerable<Glycan> LoadGlycan(string filePath, bool ToGenerateIons, bool IsOGlycanSearch)
{
bool isKind = true;
Expand All @@ -18,7 +18,7 @@ public static IEnumerable<Glycan> LoadGlycan(string filePath, bool ToGenerateIon
while(lines.Peek() != -1)
{
string line = lines.ReadLine();
if (!line.Contains("HexNAc"))
if (!line.Contains("HexNAc")) //use the first line to determine the type of glycan database.
{
isKind = false;
}
Expand All @@ -28,11 +28,11 @@ public static IEnumerable<Glycan> LoadGlycan(string filePath, bool ToGenerateIon

if (isKind)
{
return LoadKindGlycan(filePath, ToGenerateIons, IsOGlycanSearch);
return LoadKindGlycan(filePath, ToGenerateIons, IsOGlycanSearch); // open the file of the kind format, example: HexNAc(2)Hex(5)NeuAc(1)Fuc(1)
}
else
{
return LoadStructureGlycan(filePath, IsOGlycanSearch);
return LoadStructureGlycan(filePath, IsOGlycanSearch); // open the file of the structure format, example: (N(H(A))(A))
}
}

Expand All @@ -51,9 +51,9 @@ public static IEnumerable<Glycan> LoadKindGlycan(string filePath, bool ToGenerat
continue;
}

var kind = String2Kind(line);
var kind = String2Kind(line); // convert the database string to kind[] format (byte array).

var glycan = new Glycan(kind);
var glycan = new Glycan(kind); // use the kind[] to create a glycan object.
glycan.GlyId = id++;
if (ToGenerateIons)
{
Expand All @@ -71,7 +71,9 @@ public static IEnumerable<Glycan> LoadKindGlycan(string filePath, bool ToGenerat
}
}

public static byte[] String2Kind(string line)
//Convert the string to byte array.
//Input example: HexNAc(2)Hex(5)NeuAc(1)Fuc(1), Output example: [2, 5, 0, 0, 1, 0, 0, 0, 0, 1]
public static byte[] String2Kind(string line)
{
byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
var x = line.Split(new char[] { '(', ')' });
Expand All @@ -94,14 +96,15 @@ public static IEnumerable<Glycan> LoadStructureGlycan(string filePath, bool IsOG
while (glycans.Peek() != -1)
{
string line = glycans.ReadLine();
yield return Glycan.Struct2Glycan(line, id++, IsOGlycan);
yield return Glycan.Struct2Glycan(line, id++, IsOGlycan); // Directly convert the string to Glycan object.
}
}
}

//This function build fragments based on the general core of NGlyco fragments.
//From https://github.com/mobiusklein/glycopeptidepy/structure/fragmentation_strategy/glycan.py#L408
//The fragment generation is not as good as structure based method. So it is better to use a structure based N-Glycan database.
// The function is used to load the database from the different formats, but we don't use it now.
public static List<GlycanIon> NGlycanCompositionFragments(byte[] kind)
{
int glycan_mass = Glycan.GetMass(kind);
Expand Down
4 changes: 2 additions & 2 deletions MetaMorpheus/EngineLayer/GlycoSearch/GlycoPeptides.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

namespace EngineLayer.GlycoSearch
{
public static class GlycoPeptides
{
public static class GlycoPeptides
{ // a little bit confused part..., I guess the function will generate a list of isotopic intesitry of the oxonium ions.
public static double[] ScanOxoniumIonFilter(Ms2ScanWithSpecificMass theScan, MassDiffAcceptor massDiffAcceptor)
{
double[] oxoniumIonsintensities = new double[Glycan.AllOxoniumIons.Length];
Expand Down
Loading