Skip to content

Commit

Permalink
Fix string split in glyco protein parsimony (#2305)
Browse files Browse the repository at this point in the history
* reverted filtering method for psms passed to FlashLFQ

* a separator that works

* replace hashtag with tuple

* add unit test

* add orderby

---------

Co-authored-by: Alex <AlexSolivais@gmail.com>
Co-authored-by: MICHAEL SHORTREED <mrshortreed@wisc.edu>
  • Loading branch information
3 people authored Sep 7, 2023
1 parent 20713d6 commit b160b85
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ public GlycoProteinParsimony(string proteinAccess, int proteinPos, char aminoAci

public double MaxProbability { get; set; }

public static Dictionary<string, GlycoProteinParsimony> ProteinLevelGlycoParsimony(List<GlycoSpectralMatch> allPsmsGly)
public static Dictionary<(string proteinAccession, string proteinPosition, int glycanId), GlycoProteinParsimony> ProteinLevelGlycoParsimony(List<GlycoSpectralMatch> allPsmsGly)
{
//key: proPosId
Dictionary<string, GlycoProteinParsimony> localizedGlycan = new Dictionary<string, GlycoProteinParsimony>();
Dictionary<(string proteinAccession, string proteinPosition, int glycanId), GlycoProteinParsimony> localizedGlycan = new Dictionary<(string proteinAccession, string proteinPosition, int glycanId), GlycoProteinParsimony>();

foreach (var gsm in allPsmsGly)
{
Expand All @@ -59,7 +59,7 @@ public static Dictionary<string, GlycoProteinParsimony> ProteinLevelGlycoParsimo
{
int proteinPos = local.Item1 + gsm.OneBasedStartResidueInProtein.Value - 2;

string proPosId = gsm.ProteinAccession + "-" + proteinPos.ToString() + "-" + local.Item2;
(string,string,int) proPosId = new (gsm.ProteinAccession, proteinPos.ToString(), local.Item2);

double prob = -1;
if (gsm.SiteSpeciLocalProb != null && gsm.SiteSpeciLocalProb.ContainsKey(local.Item1))
Expand Down
42 changes: 22 additions & 20 deletions MetaMorpheus/TaskLayer/XLSearchTask/WriteFile.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using System.IO;
using System.Linq;
using System.Xml.Serialization;
using Easy.Common.Extensions;

namespace TaskLayer
{
Expand Down Expand Up @@ -452,50 +453,51 @@ public static void WritePsmGlycoToTsv(List<GlycoSpectralMatch> items, string fil
}

//The function is to summarize localized glycan by protein site.
public static void WriteSeenProteinGlycoLocalization(Dictionary<string, GlycoProteinParsimony> glycoProteinParsimony, string outputPath)
public static void WriteSeenProteinGlycoLocalization(Dictionary<(string proteinAccession, string proteinPosition, int glycanId), GlycoProteinParsimony> glycoProteinParsimony, string outputPath)
{
if (glycoProteinParsimony.Count == 0)
if (glycoProteinParsimony.Keys.Count == 0)
{ return; }
var writtenFile = Path.Combine(outputPath);
using (StreamWriter output = new StreamWriter(writtenFile))
{
output.WriteLine("Protein Accession\tModification Site\tAminoAcid\tLocalized Glycans\tLocalized\tLowest Qvalue\tBest Localization Level\tMax Site Specific Probability");
foreach (var item in glycoProteinParsimony.OrderBy(p=>p.Key))
foreach (var item in glycoProteinParsimony.OrderBy(i=>i.Key.proteinAccession))
{
var x = item.Key.Split('-');
output.WriteLine(
x[0] + "\t" +
x[1] + "\t" +
item.Value.AminoAcid + "\t" +
GlycanBox.GlobalOGlycans[int.Parse(x[2])].Composition + "\t" +
item.Value.IsLocalized + "\t" +
item.Value.MinQValue.ToString("0.000") + "\t" +
item.Value.BestLocalizeLevel + "\t" +
item.Value.MaxProbability.ToString("0.000")
);
if (item.Value != null)
{
output.WriteLine(
item.Key.proteinAccession + "\t" +
item.Key.proteinPosition + "\t" +
item.Value.AminoAcid + "\t" +
GlycanBox.GlobalOGlycans[item.Key.glycanId].Composition + "\t" +
item.Value.IsLocalized + "\t" +
item.Value.MinQValue.ToString("0.000") + "\t" +
item.Value.BestLocalizeLevel + "\t" +
item.Value.MaxProbability.ToString("0.000"));

}
}
}
}

//The function is to summarize localized glycosylation of each protein site.
public static void WriteProteinGlycoLocalization(Dictionary<string, GlycoProteinParsimony> glycoProteinParsimony, string outputPath)
public static void WriteProteinGlycoLocalization(Dictionary<(string proteinAccession, string proteinPosition, int glycanId), GlycoProteinParsimony> glycoProteinParsimony, string outputPath)
{
if (glycoProteinParsimony.Count == 0)
{ return; }

Dictionary<string, HashSet<string>> localizedglycans = new Dictionary<string, HashSet<string>>();
foreach (var item in glycoProteinParsimony.Where(p=>p.Value.IsLocalized && p.Value.MinQValue <= 0.01))
{
var x = item.Key.Split('-');
var key = x[0] + "-" + x[1];
var key = item.Key.proteinAccession + "#" + item.Key.proteinPosition;
if ( localizedglycans.ContainsKey(key))
{
localizedglycans[key].Add(x[2]);
localizedglycans[key].Add(item.Key.glycanId.ToString());
}
else
{
localizedglycans[key] = new HashSet<string>();
localizedglycans[key].Add(x[2]);
localizedglycans[key].Add(item.Key.glycanId.ToString());
}

}
Expand All @@ -506,7 +508,7 @@ public static void WriteProteinGlycoLocalization(Dictionary<string, GlycoProtein
output.WriteLine("Protein Accession\tModification Site\tLocalized Glycan Number\tLocalized Glycans");
foreach (var local in localizedglycans.OrderBy(p => p.Key))
{
var x = local.Key.Split('-');
var x = local.Key.Split('#');
output.WriteLine(
x[0] + "\t" +
x[1] + "\t" +
Expand Down
8 changes: 8 additions & 0 deletions MetaMorpheus/Test/GlycoTestData/P16150withHyphenInName.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>sp|P16150|LEUK_HUMAN Leuko-sialin OS=Homo sapiens OX=9606 GN=SPN PE=1 SV=1
MATLLLLLGVLVVSPDALGSTTAVQTPTSGEPLVSTSEPLSSKMYTTSITSDPKADSTGD
QTSALPPSTSINEGSPLWTSIGASTGSPLPEPTTYQEVSIKMSSVPQETPHATSHPAVPI
TANSLGSHTVTGGTITTNSPETSSRTSGAPVTTAASSLETSRGTSGPPLTMATVSLETSK
GTSGPPVTMATDSLETSTGTTGPPVTMTTGSLEPSSGASGPQVSSVKLSTMMSPTTSTNA
STVPFRNPDENSRGMLPVAVLVALLAVIVLVALLLLWRRRQKRRTGALVLSRGGKRNGVV
DAWAGPAQVPEEGAVTVTVGGSGGDKGSGFPDGEGSSRRPTLTTFFGRRKSRQGSLAMEE
LKSGSGPSLKGEEEPLVASEDGAVDAPAPDEPEGGDGAAP
3 changes: 3 additions & 0 deletions MetaMorpheus/Test/Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@
<None Update="GlycoTestData\P02649.fasta">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="GlycoTestData\P16150withHyphenInName.fasta">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="GlycoTestData\P16150.fasta">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
Expand Down
12 changes: 0 additions & 12 deletions MetaMorpheus/Test/XLTestNGlyco.cs
Original file line number Diff line number Diff line change
Expand Up @@ -63,18 +63,6 @@ public static void GlyTest_GlyGetTheoreticalFragments()
var sites = GlycoSpectralMatch.GetPossibleModSites(aPeptideWithSetModifications.Last(), motifs);
Glycan glycan = Glycan.Struct2Glycan("(N(F)(N(H(H(N))(H(N)))))", 0);


//using (StreamWriter output = new StreamWriter(Path.Combine(TestContext.CurrentContext.TestDirectory, "GlycanFragmentions.txt")))
//{
// foreach (var product in fragmentIons)
// {
// foreach (var ion in product.Item2)
// {
// output.WriteLine(ion.Annotation + "\t" + ion.NeutralLoss.ToString() + "\t" + ion.NeutralMass.ToString());
// }
// }
//}

CommonParameters commonParameters = new CommonParameters(deconvolutionMassTolerance: new PpmTolerance(20), trimMsMsPeaks: false);
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData/Glyco_3383.mgf"); //"25170.mgf"
MyFileManager myFileManager = new MyFileManager(true);
Expand Down
26 changes: 21 additions & 5 deletions MetaMorpheus/Test/XLTestOGlyco.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
using Chemistry;
using EngineLayer;
using EngineLayer.CrosslinkSearch;
using EngineLayer.Indexing;
using EngineLayer;
using MassSpectrometry;
using NUnit.Framework;
using Proteomics;
Expand All @@ -13,7 +10,6 @@
using System.Linq;
using TaskLayer;
using UsefulProteomicsDatabases;
using MzLibUtil;
using Nett;
using EngineLayer.GlycoSearch;

Expand Down Expand Up @@ -443,6 +439,26 @@ public static void OGlycoTest_Run3()
Directory.Delete(Path.Combine(Environment.CurrentDirectory, @"TESTGlycoData"), true);
}

//make sure that hyphens in protein names don't produce a crash during protein inference from glycopeptides
[Test]
public static void OGlycoTest_Run4()
{
string outputFolder = Path.Combine(TestContext.CurrentContext.TestDirectory, @"TESTGlycoData");
Directory.CreateDirectory(outputFolder);

var glycoSearchTask = Toml.ReadFile<GlycoSearchTask>(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\GlycoSearchTaskconfigOGlycoTest_Run.toml"), MetaMorpheusTask.tomlConfig);

DbForTask db = new(Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\P16150withHyphenInName.fasta"), false);
string spectraFile = Path.Combine(TestContext.CurrentContext.TestDirectory, @"GlycoTestData\2019_09_16_StcEmix_35trig_EThcD25_rep1_9906.mgf");
new EverythingRunnerEngine(new List<(string, MetaMorpheusTask)> { ("Task", glycoSearchTask) }, new List<string> { spectraFile }, new List<DbForTask> { db }, outputFolder).Run();

var folders = Directory.GetDirectories(outputFolder).Select(b => Path.GetFileName(b)).ToList();
var folderContents = Directory.GetFiles(Path.Combine(outputFolder, folders[0])).ToList();
Assert.That(folderContents[6].Contains("_AllProteinGroups.tsv"));

Directory.Delete(outputFolder, true);
}

[Test]
public static void OGlycoTest_GetLeft()
{
Expand Down

0 comments on commit b160b85

Please sign in to comment.