Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bug fix Modification Info List which reports the the modification stoichiometry in PSM counts and fractions on the full protein sequence #2321

Merged
merged 19 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions MetaMorpheus/EngineLayer/ProteinParsimony/ProteinGroup.cs
Original file line number Diff line number Diff line change
Expand Up @@ -402,22 +402,22 @@ public void CalculateSequenceCoverage()
if (psm.BaseSequence != null)
{
psm.GetAminoAcidCoverage();
var peptides = psm.BestMatchingPeptides.Select(p => p.Peptide);
foreach (var peptide in peptides)

foreach (var peptide in psm.BestMatchingPeptides.Select(psm => psm.Peptide).DistinctBy(pep => pep.FullSequence))
{
nbollis marked this conversation as resolved.
Show resolved Hide resolved
// might be unambiguous but also shared; make sure this protein group contains this peptide+protein combo
if (Proteins.Contains(peptide.Protein))
{
proteinsWithUnambigSeqPsms[peptide.Protein].Add(peptide);
//proteinsWithUnambigSeqPsmsCoverage[peptide.Protein].Add((peptide, psm.FragmentCoveragePositionInPeptide));

// null FullSequence means that mods were not successfully localized; do not display them on the sequence coverage mods info
if (psm.FullSequence != null)
if (peptide.FullSequence != null)
{
proteinsWithPsmsWithLocalizedMods[peptide.Protein].Add(peptide);
}
}
}

}
}

Expand Down
61 changes: 61 additions & 0 deletions MetaMorpheus/Test/ProteinGroupTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
using NUnit.Framework;
using Proteomics;
using System.Collections.Generic;
using System.Linq;
using Proteomics.ProteolyticDigestion;
using MassSpectrometry;
using Chemistry;
using EngineLayer.ClassicSearch;
using FlashLFQ;
using TaskLayer;
using ProteinGroup = EngineLayer.ProteinGroup;
using System.IO;
using UsefulProteomicsDatabases;

namespace Test
{
Expand Down Expand Up @@ -163,6 +168,62 @@ public static void ProteinGroupDisplayModsTestWithGetIdentifiedPeptidesMethod()
//This test just gets some lines in ProteinGroup covered. There is no accessible way to get the output of this method.
Assert.DoesNotThrow(()=>proteinGroup1.GetIdentifiedPeptidesOutput(new List<SilacLabel>()));
}

[Test]
public static void TestModificationInfoListInProteinGroupsOutput()
{
//Create GPTMD Task
//Create Search Task
GptmdTask task1 = new GptmdTask
{
CommonParameters = new CommonParameters(),
GptmdParameters = new GptmdParameters
{
ListOfModsGptmd = GlobalVariables.AllModsKnown.Where(b =>
b.ModificationType.Equals("Common Artifact")
|| b.ModificationType.Equals("Common Biological")
|| b.ModificationType.Equals("Metal")
|| b.ModificationType.Equals("Less Common")
).Select(b => (b.ModificationType, b.IdWithMotif)).ToList()
}
};

SearchTask task2 = new SearchTask
{
CommonParameters = new CommonParameters(),

SearchParameters = new SearchParameters
{
DoParsimony = true,
SearchTarget = true,
WritePrunedDatabase = true,
SearchType = SearchType.Classic
}
};
List<(string, MetaMorpheusTask)> taskList = new List<(string, MetaMorpheusTask)> { ("task1", task1), ("task2", task2) };
string mzmlName = @"TestData\PrunedDbSpectra.mzml";
string fastaName = @"TestData\DbForPrunedDb.fasta";
string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TestPrunedGeneration");
var engine = new EverythingRunnerEngine(taskList, new List<string> { mzmlName }, new List<DbForTask> { new DbForTask(fastaName, false) }, outputFolder);
engine.Run();
string final = Path.Combine(MySetUpClass.outputFolder, "task2", "DbForPrunedDbGPTMDproteinPruned.xml");
List<Protein> proteins = ProteinDbLoader.LoadProteinXML(final, true, DecoyType.Reverse, new List<Modification>(), false, new List<string>(), out var ok);
// ensures that protein out put contains the correct number of proteins to match the following conditions.
// all proteins in DB have baseSequence!=null (not ambiguous)
// all proteins that belong to a protein group are written to DB
Assert.AreEqual(18, proteins.Count);
int totalNumberOfMods = proteins.Sum(p => p.OneBasedPossibleLocalizedModifications.Count + p.SequenceVariations.Sum(sv => sv.OneBasedModifications.Count));

//tests that modifications are being done correctly
Assert.AreEqual(0, totalNumberOfMods);

List<string> proteinGroupsOutput = File.ReadAllLines(Path.Combine(outputFolder, "task2", "AllQuantifiedProteinGroups.tsv")).ToList();
string firstDataLine = proteinGroupsOutput[2];
string modInfoListProteinTwo = firstDataLine.Split('\t')[14];
Assert.AreEqual("#aa66[Hydroxylation on K,info:occupancy=0.33(1/3)];#aa71[Oxidation on S,info:occupancy=0.67(2/3)]", modInfoListProteinTwo);

Directory.Delete(outputFolder, true);
}
}
}