-
Notifications
You must be signed in to change notification settings - Fork 46
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Kdn adding and diagnostic filtering #2383
Changes from 25 commits
62013c0
2e99da2
d1da509
640c503
a0d2377
88ce7b5
9ff4c2f
02bcfce
206fd27
fdd58a3
8b2898a
8a7f48d
aebf0ec
2d7467c
359154d
09965d8
e5f3f49
3952bf1
3b80d8b
7c2c7ef
9a038a1
9d1c68a
ea690b9
f3d2831
ef33bb5
2d5db1b
7019df1
72521e8
48267d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
HexNAc(1) N1 203.07936 | ||
HexNAc(1)Hex(1) H1N1 365.1322 | ||
HexNAc(2) N2 406.15872 | ||
Hex(1)NeuAc(1) H1A1 453.14825 | ||
HexNAc(1)Kdn(1) N1K1 453.14832 | ||
HexNAc(1)NeuAc(1) N1A1 494.1748 | ||
HexNAc(1)Hex(2) H2N1 527.185 | ||
HexNAc(2)Hex(1) H1N2 568.21155 | ||
HexNAc(3) N3 609.2381 | ||
HexNAc(1)Hex(1)NeuAc(1) H1N1A1 656.2276 | ||
HexNAc(2)Kdn(1) N2K1 656.2277 | ||
HexNAc(2)NeuAc(1) N2A1 697.25415 | ||
HexNAc(2)Hex(1)Fuc(1) H1N2F1 714.2695 | ||
HexNAc(2)NeuGc(1) N2G1 713.249 | ||
HexNAc(2)Hex(2) H2N2 730.2644 | ||
HexNAc(3)Fuc(1) N3F1 755.296 | ||
HexNAc(3)Hex(1) H1N3 771.29095 | ||
HexNAc(1)Hex(2)NeuAc(1) H2N1A1 818.2804 | ||
HexNAc(2)Hex(1)Kdn(1) H1N2K1 818.2806 | ||
HexNAc(2)Hex(1)NeuAc(1) H1N2A1 859.30695 | ||
HexNAc(2)Hex(2)Fuc(1) H2N2F1 876.3223 | ||
HexNAc(3)Hex(1)Fuc(1) H1N3F1 917.3488 | ||
HexNAc(3)Hex(2) H2N3 933.34375 | ||
HexNAc(4)Fuc(1) N4F1 958.37537 | ||
HexNAc(1)Hex(1)NeuAc(2) H1N1A2 947.32306 | ||
HexNAc(2)Hex(1)Fuc(1)Kdn(1) H1N2F1K1 964.3384 | ||
HexNAc(1)Hex(2)Fuc(1)NeuAc(1) H2N1A1F1 964.3383 | ||
HexNAc(1)Hex(3)NeuAc(1) H3N1A1 980.33325 | ||
HexNAc(2)Hex(2)Kdn(1) H2N2K1 980.3334 | ||
HexNAc(2)Hex(1)Fuc(1)NeuAc(1) H1N2A1F1 1005.36487 | ||
HexNAc(3)Fuc(1)Kdn(1) N3F1K1 1005.365 | ||
HexNAc(2)Hex(2)NeuAc(1) H2N2A1 1021.35986 | ||
HexNAc(3)Hex(1)Kdn(1) H1N3K1 1021.3599 | ||
HexNAc(3)Hex(1)NeuAc(1) H1N3A1 1062.3864 | ||
HexNAc(4)Kdn(1) N4K1 1062.3865 | ||
HexNAc(3)Hex(2)Fuc(1) H2N3F1 1079.4017 | ||
HexNAc(4)NeuAc(1) N4A1 1103.4128 | ||
HexNAc(1)Hex(2)NeuAc(2) H2N1A2 1109.3759 | ||
HexNAc(2)Hex(1)NeuAc(1)Kdn(1) H1N2A1K1 1109.376 | ||
HexNAc(4)Hex(2) H2N4 1136.4231 | ||
HexNAc(2)Hex(1)NeuAc(2) H1N2A2 1150.4023 | ||
HexNAc(2)Hex(2)Fuc(3) H2N2F3 1168.4381 | ||
HexNAc(2)Hex(2)Fuc(1)NeuAc(1) H2N2A1F1 1167.4177 | ||
HexNAc(3)Hex(1)Fuc(1)Kdn(1) H1N3F1K1 1167.4178 | ||
HexNAc(2)Hex(3)NeuAc(1) H3N2A1 1183.4126 | ||
HexNAc(3)Hex(2)Kdn(1) H2N3K1 1183.4127 | ||
HexNAc(3)Hex(1)Fuc(1)NeuAc(1) H1N3A1F1 1208.4442 | ||
HexNAc(3)Hex(2)Fuc(2) H2N3F2 1225.4596 | ||
HexNAc(3)Hex(2)NeuAc(1) H2N3A1 1224.4392 | ||
HexNAc(4)Hex(1)Kdn(1) H1N4K1 1224.4393 | ||
HexNAc(1)Hex(1)NeuAc(3) H1N1A3 1238.4185 | ||
HexNAc(2)Hex(1)Fuc(1)NeuAc(2) H1N2A2F1 1296.4603 | ||
HexNAc(3)Fuc(1)NeuAc(1)Kdn(1) N3A1F1K1 1296.4604 | ||
HexNAc(2)Hex(2)NeuAc(2) H2N2A2 1312.4552 | ||
HexNAc(3)Hex(1)NeuAc(1)Kdn(1) H1N3A1K1 1312.4553 | ||
HexNAc(3)Hex(1)NeuAc(2) H1N3A2 1353.4818 | ||
HexNAc(4)NeuAc(1)Kdn(1) N4A1K1 1353.4819 | ||
HexNAc(3)Hex(2)Fuc(1)NeuAc(1) H2N3A1F1 1370.4971 | ||
HexNAc(4)Hex(1)Fuc(1)Kdn(1) H1N4F1K1 1370.4971 | ||
HexNAc(4)NeuAc(2) N4A2 1394.5083 | ||
HexNAc(3)Hex(3)NeuAc(1) H3N3A1 1386.492 | ||
HexNAc(4)Hex(2)Kdn(1) H2N4K1 1386.4922 | ||
HexNAc(2)Hex(2)Fuc(1)NeuAc(2) H2N2A2F1 1458.5131 | ||
HexNAc(3)Hex(1)Fuc(1)NeuAc(1)Kdn(1) H1N3A1F1K1 1458.5133 |
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this need to be a separate file or should it be added to one of the existing .gdb files? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
(N) | ||
(N(H)) | ||
(N(A)) | ||
(N(H)(N)) | ||
(N(H(A))) | ||
(N(N(K))) | ||
(N(H)(N(H))) | ||
(N(H(A))(N)) | ||
(N(H(A))(A)) | ||
(N(H(A))(N(H))) | ||
(N(H)(N(H(A))(F))) | ||
(N(H(A))(N(H(A)))) | ||
(N(H(A))(N(H(A))(F))) |
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,20 +9,25 @@ | |
|
||
namespace EngineLayer | ||
{ | ||
//One peptide can have several o-glycans. The combined glycans are grouped as a glycan box. Used for localization. | ||
//GlycanBox -- A defined combination of glycans will be considered to modify on one peptide. | ||
//The GlycanBoxMass is the total mass of all glycans on the peptide | ||
|
||
/// <summary> | ||
/// A defined combination of glycans to modify on one peptide. Ex. if we have 3 glycans on one peptide (g1,g2,g3), the GlycanBoxMass is the sum of the three glycans.(glycanBox: [g1,g2,g3]) | ||
/// </summary> | ||
public class GlycanBox:ModBox | ||
{ | ||
public static Glycan[] GlobalOGlycans { get; set; } | ||
public static Glycan[] GlobalOGlycans { get; set; } // The glycan list in the database file | ||
|
||
public GlycanBox[] ChildGlycanBoxes { get; set; } // all possible glycan combinations in the glycanBox | ||
|
||
public static Modification[] GlobalOGlycanModifications { get; set; } | ||
|
||
public static GlycanBox[] OGlycanBoxes { get; set; } | ||
public static GlycanBox[] OGlycanBoxes { get; set; } // all possible glycan boxes | ||
|
||
public byte[] Kind { get; private set; } | ||
|
||
//TO DO: Decoy O-glycan can be created, but the results need to be reasoned. | ||
//public static int[] SugarShift = new int[]{ -16205282, -20307937, -29109542, -14605791, -30709033, -15005282, -36513219, -40615874, 16205282, 20307937, 29109542, 14605791, 30709033, 15005282, 36513219, 40615874 }; | ||
private readonly static int[] SugarShift = new int[] | ||
private readonly static int[] SugarShift = new int[] //still unclear about the shift... | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are we still unclear about this shift? Can you add a comment about what these numbers are and how they are used? |
||
{ | ||
7103710, 10300920, 11502690, 12904260, 14706840, 5702150, 13705890, 12809500, 11308410, 13104050, | ||
11404290, 9705280, 12805860, 15610110, 8703200, 10104770, 9906840, 18607930, 16306330, | ||
|
@@ -31,7 +36,11 @@ public class GlycanBox:ModBox | |
|
||
}; | ||
|
||
//After O-glycans are read in from database, we build combinations of glycans into GlycanBox. The maxNum is maximum glycans allowed on one peptides. | ||
/// <summary> | ||
/// Use the glycan from database to create all possible combination glycan set into GlycanBox. | ||
/// </summary> | ||
/// <param name="maxNum"> The maxNum is maximum glycans allowed on one peptides </param> | ||
/// <returns> The glycanBox collection, glycanBox[]</returns> | ||
public static IEnumerable<GlycanBox> BuildOGlycanBoxes(int maxNum) | ||
{ | ||
return BuildOGlycanBoxes(maxNum, false); | ||
|
@@ -51,7 +60,7 @@ public static IEnumerable<GlycanBox> BuildOGlycanBoxes(int maxNum, bool buildDec | |
|
||
if (buildDecoy) | ||
{ | ||
GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray()); | ||
GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray(),false); // decoy glycanBox | ||
glycanBox_decoy.TargetDecoy = false; | ||
glycanBox_decoy.ChildGlycanBoxes = BuildChildOGlycanBoxes(glycanBox_decoy.NumberOfMods, glycanBox_decoy.ModIds, glycanBox_decoy.TargetDecoy).ToArray(); | ||
yield return glycanBox_decoy; | ||
|
@@ -60,8 +69,11 @@ public static IEnumerable<GlycanBox> BuildOGlycanBoxes(int maxNum, bool buildDec | |
} | ||
} | ||
|
||
//After O-glycans are read in from database, we transfer the glycans into 'Modification' class type for MetaMorpheus to manipulate sequences. | ||
//In the future we may able to combine the two type together. | ||
/// <summary> | ||
/// Convert the glycan into Modification type for MetaMorpheus to manipulate sequences. In the future we may able to combine the two type together. | ||
/// </summary> | ||
/// <param name="globalOGlycans"></param> | ||
/// <returns></returns> | ||
public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGlycans) | ||
{ | ||
Modification[] globalOGlycanModifications = new Modification[globalOGlycans.Length]; | ||
|
@@ -73,20 +85,26 @@ public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGly | |
return globalOGlycanModifications; | ||
} | ||
|
||
//The function here is to build GlycanBoxes used for LocalizationGraph. | ||
//In LocalizationGraph matrix, for each AdjNode, it represent a ChildOGlycanBox here at certain glycosite. | ||
|
||
/// <summary> | ||
/// Generate all possible child/fragment box of the specific glycanBox. The childBoxes is uesd for LocalizationGraph. | ||
/// </summary> | ||
/// <param name="maxNum"></param> | ||
/// <param name="glycanIds"> The glycanBox, ex. [0,0,1] means glycan0 + glycan0 + glycan1 </param> | ||
/// <param name="targetDecoy"></param> | ||
/// <returns> The ChildBox collection, ChildBox[] </returns> | ||
public static IEnumerable<GlycanBox> BuildChildOGlycanBoxes(int maxNum, int[] glycanIds, bool targetDecoy = true) | ||
{ | ||
yield return new GlycanBox(new int[0], targetDecoy); | ||
HashSet<string> seen = new HashSet<string>(); | ||
for (int i = 1; i <= maxNum; i++) | ||
{ | ||
foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i)) | ||
{ | ||
List<int> ids = new List<int>(); | ||
foreach (var id in idCombine) | ||
foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i)) //get all combinations of glycans on the peptide, ex. we have three glycosite and three glycan maybe on that (A,B,C) | ||
{ //the combination of glycans on the peptide can be (A),(A+B),(A+C),(B+C),(A+B+C) totally six | ||
List<int> ids = new List<int>(); | ||
foreach (var id in idCombine) | ||
{ | ||
ids.Add(glycanIds[id]); | ||
ids.Add(glycanIds[id]); | ||
} | ||
|
||
if (!seen.Contains(string.Join(",", ids.Select(p => p.ToString())))) | ||
|
@@ -102,14 +120,19 @@ public static IEnumerable<GlycanBox> BuildChildOGlycanBoxes(int maxNum, int[] gl | |
} | ||
} | ||
|
||
/// <summary> | ||
/// Constructor of GlycanBox. | ||
/// </summary> | ||
/// <param name="ids"> The glycanBox composition, each number represent one glycan index in the database</param> | ||
/// <param name="targetDecoy"></param> | ||
public GlycanBox(int[] ids, bool targetDecoy = true):base(ids) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. bool targetDecoy should be changed to isTarget to make it more clear if true is a target or true is a decoy There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Corrected it. Thanks |
||
{ | ||
byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; | ||
foreach (var id in ModIds) | ||
byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; | ||
foreach (var id in ModIds) //ModIds is the same as ids. | ||
{ | ||
for (int i = 0; i < kind.Length; i++) | ||
for (int i = 0; i < kind.Length; i++) | ||
{ | ||
kind[i] += GlobalOGlycans[id].Kind[i]; | ||
kind[i] += GlobalOGlycans[id].Kind[i]; //kind is the sum of all glycan Kind in the Box. | ||
} | ||
} | ||
Kind = kind; | ||
|
@@ -125,18 +148,13 @@ public GlycanBox(int[] ids, bool targetDecoy = true):base(ids) | |
Mass = (double)(Glycan.GetMass(Kind) + SugarShift[shiftInd]) / 1E5; | ||
} | ||
} | ||
|
||
public GlycanBox[] ChildGlycanBoxes { get; set; } | ||
|
||
public string GlycanIdString | ||
|
||
public string GlycanIdString // the composition of glycanBox. Example: [1,2,3] means glycan1 + glycan2 + glycan3 are on the peptide. | ||
{ | ||
get | ||
{ | ||
return string.Join(",", ModIds.Select(p => p.ToString())); | ||
} | ||
} | ||
|
||
public byte[] Kind{ get; private set; } | ||
|
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this need to be a separate file or should it be added to one of the existing glyco files?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just added to the existing glyco files. The Glycan_Mods is a default glycan database collection for the user.