Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug fix Modification Info List which reports the the modification stoichiometry in PSM counts and fractions on the full protein sequence #2321

Merged
merged 19 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -402,19 +402,23 @@ public void CalculateSequenceCoverage()
if (psm.BaseSequence != null)
{
psm.GetAminoAcidCoverage();
var peptides = psm.BestMatchingPeptides.Select(p => p.Peptide);
foreach (var peptide in peptides)
var peptides = psm.BestMatchingPeptides.Select(p => p.Peptide).ToList();
peptides = peptides.DistinctBy(p => p.FullSequence).ToList();

if (peptides.Any())
{
nbollis marked this conversation as resolved.
Show resolved Hide resolved
// might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo
if (Proteins.Contains(peptide.Protein))
foreach (var peptide in peptides)
{
proteinsWithUnambigSeqPsms[peptide.Protein].Add(peptide);
//proteinsWithUnambigSeqPsmsCoverage[peptide.Protein].Add((peptide, psm.FragmentCoveragePositionInPeptide));

// null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info
if (psm.FullSequence != null)
// might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo
if (Proteins.Contains(peptide.Protein))
{
proteinsWithPsmsWithLocalizedMods[peptide.Protein].Add(peptide);
proteinsWithUnambigSeqPsms[peptide.Protein].Add(peptide);

// null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info
if (peptide.FullSequence != null)
{
proteinsWithPsmsWithLocalizedMods[peptide.Protein].Add(peptide);
}
}
}
}
Expand Down
21 changes: 11 additions & 10 deletions MetaMorpheus/EngineLayer/ProteolyticDigestion/proteases.tsv
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
Name Sequences Inducing Cleavage Sequences Preventing Cleavage Cleavage Terminus Cleavage Specificity PSI-MS Accession Number PSI-MS Name Site Regular Expression Cleavage Mass Shifts Notes
Arg-C R| full MS:1001303 Arg-C (?<=R)(?!P)
Asp-N |D full MS:1001304 Asp-N (?=[BD])
chymotrypsin (don't cleave before proline) "F[P]|,W[P]|,Y[P]|" full MS:1001306 Chymotrypsin (?<=[FYWL])(?!P)
chymotrypsin (cleave before proline) "F|,W|,Y|" full MS:1001306 Chymotrypsin (?<=[FYWL])
CNBr M| full MS:1001307 CNBr (?<=M) Homoserine lactone on M
elastase "A|,V|,S|,G|,L|,I|" full Elastase (?<=[AVSGLI])
chymotrypsin (don't cleave before proline) F[P]|,W[P]|,Y[P]| full MS:1001306 Chymotrypsin (?<=[FYWL])(?!P)
chymotrypsin (cleave before proline) F|,W|,Y| full MS:1001306 Chymotrypsin (?<=[FYWL])
CNBr M| full MS:1001307 CNBr (?<=M) Homoserine lactone on M
elastase(don't cleave before proline 15mc) I[P]|,S[P]|,L[P]|,V[P]|,N[P]|,T[P]|,K[P]|,A[P]|,G[P]|,Y[P]|,Q[P]|,R[P]|,F[P]|,D[P]| full Elastase/P (?<=[IPSPLPVPNPTPKPAPGPYPQPRPFD])(?!P)
nbollis marked this conversation as resolved.
Show resolved Hide resolved
Glu-C E| full
Glu-C (with asp) "E|,D|" full
Glu-C (with asp) E|,D| full
Lys-C (don't cleave before proline) K[P]| full MS:1001309 Lys-C (?<=K)(?!P)
Lys-C (cleave before proline) K| full MS:1001310 Lys-C/P (?<=K)
Lys-N |K full
semi-trypsin "K|,R|" semi MS:1001313 Trypsin/P (?<=[KR])
trypsin "K|,R|" full MS:1001313 Trypsin/P (?<=[KR])
semi-trypsin K|,R| semi MS:1001313 Trypsin/P (?<=[KR])
trypsin K|,R| full MS:1001313 Trypsin/P (?<=[KR])
tryptophan oxidation W| full
non-specific X| full MS:1001956 unspecific cleavage
top-down none MS:1001955 no cleavage
top-down none MS:1001955 no cleavage
singleN SingleN MS:1001957 single cleavage
singleC SingleC MS:1001958 single cleavage
peptidomics none no cleavage
collagenase GPX|GPX full
StcE-trypsin "TX|T,TX|S,SX|T,SX|S,K|,R|" full StcE/Trpsin
ProAlanase "P|,A|" full
StcE-trypsin TX|T,TX|S,SX|T,SX|S,K|,R| full StcE/Trpsin
ProAlanase P|,A| full
subtilisin(don't cleave before proline 15mc) N[P]|,S[P]|,L[P]|,K[P]|,I[P]|,D[P]|,Y[P]|,V[P]|,G[P]|,F[P]|,T[P]|,E[P]|,Q[P]|,A[P]|,R[P]| full Subtilisin/P (?<=[NSLKIDYVGFTEQAR])(?!P)
61 changes: 61 additions & 0 deletions MetaMorpheus/Test/ProteinGroupTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
using NUnit.Framework;
using Proteomics;
using System.Collections.Generic;
using System.Linq;
using Proteomics.ProteolyticDigestion;
using MassSpectrometry;
using Chemistry;
using EngineLayer.ClassicSearch;
using FlashLFQ;
using TaskLayer;
using ProteinGroup = EngineLayer.ProteinGroup;
using System.IO;
using UsefulProteomicsDatabases;

namespace Test
{
Expand Down Expand Up @@ -163,6 +168,62 @@ public static void ProteinGroupDisplayModsTestWithGetIdentifiedPeptidesMethod()
//This test just gets some lines in ProteinGroup covered. There is no accessible way to get the output of this method.
Assert.DoesNotThrow(()=>proteinGroup1.GetIdentifiedPeptidesOutput(new List<SilacLabel>()));
}

[Test]
public static void TestModificationInfoListInProteinGroupsOutput()
{
//Create GPTMD Task
//Create Search Task
GptmdTask task1 = new GptmdTask
{
CommonParameters = new CommonParameters(),
GptmdParameters = new GptmdParameters
{
ListOfModsGptmd = GlobalVariables.AllModsKnown.Where(b =>
b.ModificationType.Equals("Common Artifact")
|| b.ModificationType.Equals("Common Biological")
|| b.ModificationType.Equals("Metal")
|| b.ModificationType.Equals("Less Common")
).Select(b => (b.ModificationType, b.IdWithMotif)).ToList()
}
};

SearchTask task2 = new SearchTask
{
CommonParameters = new CommonParameters(),

SearchParameters = new SearchParameters
{
DoParsimony = true,
SearchTarget = true,
WritePrunedDatabase = true,
SearchType = SearchType.Classic
}
};
List<(string, MetaMorpheusTask)> taskList = new List<(string, MetaMorpheusTask)> { ("task1", task1), ("task2", task2) };
string mzmlName = @"TestData\PrunedDbSpectra.mzml";
string fastaName = @"TestData\DbForPrunedDb.fasta";
string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPrunedGeneration");
var engine = new EverythingRunnerEngine(taskList, new List<string> { mzmlName }, new List<DbForTask> { new DbForTask(fastaName, false) }, outputFolder);
engine.Run();
string final = Path.Combine(MySetUpClass.outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml");
List<Protein> proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List<Modification>(), false, new List<string>(), out var ok);
// ensures that protein out put contains the correct number of proteins to match the following conditions.
// all proteins in DB have baseSequence!=null (not ambiguous)
// all proteins that belong to a protein group are written to DB
Assert.AreEqual(18, proteins.Count);
int totalNumberOfMods = proteins.Sum(p => p.OneBasedPossibleLocalizedModifications.Count + p.SequenceVariations.Sum(sv => sv.OneBasedModifications.Count));

//tests that modifications are being done correctly
Assert.AreEqual(0, totalNumberOfMods);

List<string> proteinGroupsOutput = File.ReadAllLines(Path.Combine(outputFolder, "task2", "AllQuantifiedProteinGroups.tsv")).ToList();
string firstDataLine = proteinGroupsOutput[2];
string modInfoListProteinTwo = firstDataLine.Split('\t')[14];
Assert.AreEqual("#aa66[Hydroxylation on K,info:occupancy=0.33(1/3)];#aa71[Oxidation on S,info:occupancy=0.67(2/3)]", modInfoListProteinTwo);

Directory.Delete(outputFolder, true);
}
}
}