Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Kdn adding and diagnostic filtering #2383

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
62013c0
The lie's comment about the glyco-searching
RayMSMS May 28, 2024
2e99da2
Try to add the search summary information (PSMs, protein group, glyco…
RayMSMS Jun 4, 2024
d1da509
Merge branch 'master' into glyco-search-comment
nbollis Jun 4, 2024
640c503
Merge branch 'master' into glyco-search-comment
nbollis Jun 5, 2024
a0d2377
add the contaminant tester
RayMSMS Jun 5, 2024
88ce7b5
Merge branch 'glyco-search-comment' of https://github.com/RayMSMS/Met…
RayMSMS Jun 5, 2024
9ff4c2f
Delet the unused constructor of Node class to cheat the coverage check
RayMSMS Jun 6, 2024
02bcfce
Fix the Fdr filter (initial: < 0.1, now <= 0.1)
RayMSMS Jun 6, 2024
206fd27
Try to pass the coverage test, add the docoy filtering tester
RayMSMS Jun 10, 2024
fdd58a3
Merge branch 'master' into glyco-search-comment
nbollis Jun 13, 2024
8b2898a
Merge branch 'master' into glyco-search-comment
nbollis Jun 26, 2024
8a7f48d
update 7/2/2024
RayMSMS Jul 2, 2024
aebf0ec
Merge branch 'glyco-search-comment' of https://github.com/RayMSMS/Met…
RayMSMS Jul 2, 2024
2d7467c
Search the new sugar "Kdn"
RayMSMS Jul 3, 2024
359154d
In order to pass the converage, add the new model in the tester "N-gl…
RayMSMS Jul 3, 2024
09965d8
Update 7/4/2024
RayMSMS Jul 4, 2024
e5f3f49
update 7/5/2024
RayMSMS Jul 5, 2024
3952bf1
Merge branch 'master' into glyco-search-comment
RayMSMS Jul 5, 2024
3b80d8b
wrting fuction update and tester
RayMSMS Jul 5, 2024
7c2c7ef
Merge in my comment branch
RayMSMS Jul 5, 2024
9a038a1
update 7/10
RayMSMS Jul 10, 2024
9d1c68a
Merge branch 'master' into Kdn-Adding-and-diagnostic-filtering
RayMSMS Jul 10, 2024
ea690b9
fix the issue: The glycan database need to reload by the tester
RayMSMS Jul 10, 2024
f3d2831
Merge branch 'Kdn-Adding-and-diagnostic-filtering' of https://github.…
RayMSMS Jul 10, 2024
ef33bb5
update 7/11/2024 try to fix the failed test issue
RayMSMS Jul 11, 2024
2d5db1b
update 7/24
RayMSMS Jul 24, 2024
7019df1
Merge branch 'master' into Kdn-Adding-and-diagnostic-filtering
RayMSMS Jul 30, 2024
72521e8
update 7/30
RayMSMS Jul 30, 2024
48267d9
Merge branch 'Kdn-Adding-and-diagnostic-filtering' of https://github.…
RayMSMS Jul 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions MetaMorpheus/EngineLayer/EngineLayer.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@
<None Update="Glycan_Mods\OGlycan\OGlycan.gdb">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Glycan_Mods\OGlycan\OGlycan_withIsobaric.gdb">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Glycan_Mods\OGlycan\Olgycan Database 28 glycans plus Sodium 56 total.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
(N)
(N(H))
(N(A))
(N(H)(N))
(N(H(A)))
(N(N(K)))
(N(H)(N(H)))
(N(H(A))(N))
(N(H(A))(A))
(N(H(A))(N(H)))
(N(H)(N(H(A))(F)))
(N(H(A))(N(H(A))))
(N(H(A))(N(H(A))(F)))
1 change: 1 addition & 0 deletions MetaMorpheus/EngineLayer/GlycoSearch/AdjNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

namespace EngineLayer.GlycoSearch
{
//the class is for localization graph matrix. Each node in the matrix is represented by AdjNode.
public class AdjNode
{
//AdjNode -> Adjactent node is used to build graph matrix for localizaiton. Each node in graph matrix contain Sources, max cost, current cost, etc.
Expand Down
179 changes: 125 additions & 54 deletions MetaMorpheus/EngineLayer/GlycoSearch/Glycan.cs

Large diffs are not rendered by default.

78 changes: 48 additions & 30 deletions MetaMorpheus/EngineLayer/GlycoSearch/GlycanBox.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,25 @@

namespace EngineLayer
{
//One peptide can have several o-glycans. The combined glycans are grouped as a glycan box. Used for localization.
//GlycanBox -- A defined combination of glycans will be considered to modify on one peptide.
//The GlycanBoxMass is the total mass of all glycans on the peptide

/// <summary>
/// A defined combination of glycans to modify on one peptide. Ex. if we have 3 glycans on one peptide (g1,g2,g3), the GlycanBoxMass is the sum of the three glycans.(glycanBox: [g1,g2,g3])
/// </summary>
public class GlycanBox:ModBox
{
public static Glycan[] GlobalOGlycans { get; set; }
public static Glycan[] GlobalOGlycans { get; set; } // The glycan list in the database file

public GlycanBox[] ChildGlycanBoxes { get; set; } // all possible glycan combinations in the glycanBox

public static Modification[] GlobalOGlycanModifications { get; set; }

public static GlycanBox[] OGlycanBoxes { get; set; }
public static GlycanBox[] OGlycanBoxes { get; set; } // all possible glycan boxes

public byte[] Kind { get; private set; }

//TO DO: Decoy O-glycan can be created, but the results need to be reasoned.
//public static int[] SugarShift = new int[]{ -16205282, -20307937, -29109542, -14605791, -30709033, -15005282, -36513219, -40615874, 16205282, 20307937, 29109542, 14605791, 30709033, 15005282, 36513219, 40615874 };
private readonly static int[] SugarShift = new int[]
private readonly static int[] SugarShift = new int[] //still unclear about the shift...
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are we still unclear about this shift? Can you add a comment about what these numbers are and how they are used?

{
7103710, 10300920, 11502690, 12904260, 14706840, 5702150, 13705890, 12809500, 11308410, 13104050,
11404290, 9705280, 12805860, 15610110, 8703200, 10104770, 9906840, 18607930, 16306330,
Expand All @@ -31,7 +36,11 @@ public class GlycanBox:ModBox

};

//After O-glycans are read in from database, we build combinations of glycans into GlycanBox. The maxNum is maximum glycans allowed on one peptides.
/// <summary>
/// Use the glycan from database to create all possible combination glycan set into GlycanBox.
/// </summary>
/// <param name="maxNum"> The maxNum is maximum glycans allowed on one peptides </param>
/// <returns> The glycanBox collection, glycanBox[]</returns>
public static IEnumerable<GlycanBox> BuildOGlycanBoxes(int maxNum)
{
return BuildOGlycanBoxes(maxNum, false);
Expand All @@ -51,7 +60,7 @@ public static IEnumerable<GlycanBox> BuildOGlycanBoxes(int maxNum, bool buildDec

if (buildDecoy)
{
GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray());
GlycanBox glycanBox_decoy = new GlycanBox(idCombine.ToArray(),false); // decoy glycanBox
glycanBox_decoy.TargetDecoy = false;
glycanBox_decoy.ChildGlycanBoxes = BuildChildOGlycanBoxes(glycanBox_decoy.NumberOfMods, glycanBox_decoy.ModIds, glycanBox_decoy.TargetDecoy).ToArray();
yield return glycanBox_decoy;
Expand All @@ -60,8 +69,11 @@ public static IEnumerable<GlycanBox> BuildOGlycanBoxes(int maxNum, bool buildDec
}
}

//After O-glycans are read in from database, we transfer the glycans into 'Modification' class type for MetaMorpheus to manipulate sequences.
//In the future we may able to combine the two type together.
/// <summary>
/// Convert the glycan into Modification type for MetaMorpheus to manipulate sequences. In the future we may able to combine the two type together.
/// </summary>
/// <param name="globalOGlycans"></param>
/// <returns></returns>
public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGlycans)
{
Modification[] globalOGlycanModifications = new Modification[globalOGlycans.Length];
Expand All @@ -73,20 +85,26 @@ public static Modification[] BuildGlobalOGlycanModifications(Glycan[] globalOGly
return globalOGlycanModifications;
}

//The function here is to build GlycanBoxes used for LocalizationGraph.
//In LocalizationGraph matrix, for each AdjNode, it represent a ChildOGlycanBox here at certain glycosite.

/// <summary>
/// Generate all possible child/fragment box of the specific glycanBox. The childBoxes is uesd for LocalizationGraph.
/// </summary>
/// <param name="maxNum"></param>
/// <param name="glycanIds"> The glycanBox, ex. [0,0,1] means glycan0 + glycan0 + glycan1 </param>
/// <param name="targetDecoy"></param>
/// <returns> The ChildBox collection, ChildBox[] </returns>
public static IEnumerable<GlycanBox> BuildChildOGlycanBoxes(int maxNum, int[] glycanIds, bool targetDecoy = true)
{
yield return new GlycanBox(new int[0], targetDecoy);
HashSet<string> seen = new HashSet<string>();
for (int i = 1; i <= maxNum; i++)
{
foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i))
{
List<int> ids = new List<int>();
foreach (var id in idCombine)
foreach (var idCombine in Glycan.GetKCombs(Enumerable.Range(0, maxNum), i)) //get all combinations of glycans on the peptide, ex. we have three glycosite and three glycan maybe on that (A,B,C)
{ //the combination of glycans on the peptide can be (A),(A+B),(A+C),(B+C),(A+B+C) totally six
List<int> ids = new List<int>();
foreach (var id in idCombine)
{
ids.Add(glycanIds[id]);
ids.Add(glycanIds[id]);
}

if (!seen.Contains(string.Join(",", ids.Select(p => p.ToString()))))
Expand All @@ -102,19 +120,24 @@ public static IEnumerable<GlycanBox> BuildChildOGlycanBoxes(int maxNum, int[] gl
}
}

public GlycanBox(int[] ids, bool targetDecoy = true):base(ids)
/// <summary>
/// Constructor of GlycanBox.
/// </summary>
/// <param name="ids"> The glycanBox composition, each number represent one glycan index in the database</param>
/// <param name="targetDecoy"></param>
public GlycanBox(int[] ids, bool Istarget = true):base(ids)
{
byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
foreach (var id in ModIds)
byte[] kind = new byte[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
foreach (var id in ModIds) //ModIds is the same as ids.
{
for (int i = 0; i < kind.Length; i++)
for (int i = 0; i < kind.Length; i++)
{
kind[i] += GlobalOGlycans[id].Kind[i];
kind[i] += GlobalOGlycans[id].Kind[i]; //kind is the sum of all glycan Kind in the Box.
}
}
Kind = kind;

if (targetDecoy)
if (Istarget)
{
Mass = (double)Glycan.GetMass(Kind) / 1E5;
}
Expand All @@ -125,18 +148,13 @@ public GlycanBox(int[] ids, bool targetDecoy = true):base(ids)
Mass = (double)(Glycan.GetMass(Kind) + SugarShift[shiftInd]) / 1E5;
}
}

public GlycanBox[] ChildGlycanBoxes { get; set; }

public string GlycanIdString

public string GlycanIdString // the composition of glycanBox. Example: [1,2,3] means glycan1 + glycan2 + glycan3 are on the peptide.
{
get
{
return string.Join(",", ModIds.Select(p => p.ToString()));
}
}

public byte[] Kind{ get; private set; }

}
}
Loading