Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gptmd approach update #2419

Open
wants to merge 28 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
bd5aefc
update mzlib nuget package to 551
trishorts Aug 26, 2024
d60e32e
Merge remote-tracking branch 'upstream/master'
trishorts Sep 5, 2024
5e65835
use psms score to revise what ptms are added in gptmd
trishorts Sep 18, 2024
dd2cbfa
now deals w/ variants
trishorts Sep 19, 2024
7a7dad6
fix unit testts
trishorts Sep 19, 2024
22fb336
Merge remote-tracking branch 'upstream/master' into gptmdApproachUpdate
trishorts Sep 19, 2024
8245a0e
unused lines
trishorts Sep 19, 2024
ce6c4e1
load files in background
trishorts Sep 19, 2024
ef44e03
add parallelization and eliminate pep
trishorts Sep 20, 2024
f33313b
fix unit tests
trishorts Sep 20, 2024
5a3445b
yo
trishorts Sep 20, 2024
2dcc22c
fix those unit tests
trishorts Sep 20, 2024
b7371d0
fix unit test
trishorts Sep 20, 2024
67aa1d2
test gptmd DissociationType Autodetect
trishorts Sep 23, 2024
bf0401d
gptmd test mod before variant
trishorts Sep 24, 2024
c44a719
new test
trishorts Sep 24, 2024
40abab6
Merge branch 'master' into gptmdApproachUpdate
trishorts Sep 24, 2024
0646bb4
Merge branch 'master' into gptmdApproachUpdate
trishorts Sep 24, 2024
96268d0
delete unreachable code
trishorts Sep 27, 2024
63c6fff
unit test for xcorr process spectra and matched ion with unknown mass
trishorts Sep 27, 2024
71c2274
Test MatchFragmentIons when scan has no peaks
trishorts Sep 27, 2024
46f902e
test gptmd task with contaminant database
trishorts Sep 27, 2024
cf83389
return missing comments
trishorts Sep 27, 2024
aed2aba
Merge branch 'gptmdApproachUpdate' of https://github.com/trishorts/Me…
trishorts Sep 27, 2024
388cc0b
Merge branch 'master' into gptmdApproachUpdate
trishorts Sep 27, 2024
04c3ed8
more unit test coverage for modification analysis engine
trishorts Sep 30, 2024
8d48e53
Merge branch 'gptmdApproachUpdate' of https://github.com/trishorts/Me…
trishorts Sep 30, 2024
51b1c58
Merge branch 'master' into gptmdApproachUpdate
trishorts Oct 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 143 additions & 64 deletions MetaMorpheus/EngineLayer/Gptmd/GptmdEngine.cs

Large diffs are not rendered by default.

7 changes: 1 addition & 6 deletions MetaMorpheus/EngineLayer/MetaMorpheusEngine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,6 @@ public static List<MatchedFragmentIon> MatchFragmentIons(Ms2ScanWithSpecificMass

if (scan.TheScan.MassSpectrum.XcorrProcessed && scan.TheScan.MassSpectrum.XArray.Length != 0)
{
// if the spectrum has no peaks
if (scan.TheScan.MassSpectrum.XArray.Length == 0)
{
return matchedFragmentIons;
}

for (int i = 0; i < theoreticalProducts.Count; i++)
{
Expand Down Expand Up @@ -225,7 +220,7 @@ public static List<MatchedFragmentIon> MatchFragmentIons(Ms2ScanWithSpecificMass

return matchedFragmentIons;
}

//Used only when user wants to generate spectral library.
//Normal search only looks for one match ion for one fragment, and if it accepts it then it doesn't try to look for different charge states of that same fragment.
//But for library generation, we need find all the matched peaks with all the different charges.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ protected override MetaMorpheusEngineResults RunSpecific()
if (unlocalizedFormulas.ContainsKey(representativePsm.ModsChemicalFormula))
unlocalizedFormulas[representativePsm.ModsChemicalFormula] += 1;
else
unlocalizedFormulas.Add(representativePsm.ModsChemicalFormula, 1);
unlocalizedFormulas.Add(representativePsm.ModsChemicalFormula, 1);
}

myAnalysisResults.CountOfEachModSeenOnProteins = modsOnProteins.GroupBy(b => b.Item2).ToDictionary(b => b.Key, b => b.Count());
Expand Down
67 changes: 48 additions & 19 deletions MetaMorpheus/TaskLayer/GPTMDTask/GPTMDTask.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
using Proteomics.ProteolyticDigestion;
using System.Globalization;
using Omics.Modifications;
using System.Threading.Tasks;

namespace TaskLayer
{
Expand All @@ -30,14 +31,30 @@ public GptmdTask() : base(MyTask.Gptmd)

protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask> dbFilenameList, List<string> currentRawFileList, string taskId, FileSpecificParameters[] fileSettingsList)
{
MyFileManager myFileManager = new MyFileManager(true);
var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));

// start loading first spectra file in the background
Task<MsDataFile> nextFileLoadingTask = new(() => myFileManager.LoadFile(currentRawFileList[0], SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[0])));
nextFileLoadingTask.Start();
LoadModifications(taskId, out var variableModifications, out var fixedModifications, out var localizeableModificationTypes);

// start loading proteins in the background
List<Protein> proteinList = null;
Task<List<Protein>> proteinLoadingTask = new(() =>
{
var proteins = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse,
localizeableModificationTypes,
CommonParameters);
SanitizeProteinDatabase(proteins, TargetContaminantAmbiguity.RemoveContaminant);
return proteins;
});
proteinLoadingTask.Start();

// TODO: print error messages loading GPTMD mods
List<Modification> gptmdModifications = GlobalVariables.AllModsKnown.OfType<Modification>().Where(b => GptmdParameters.ListOfModsGptmd.Contains((b.ModificationType, b.IdWithMotif))).ToList();
IEnumerable<Tuple<double, double>> combos = LoadCombos(gptmdModifications).ToList();

// load proteins
List<Protein> proteinList = LoadProteins(taskId, dbFilenameList, true, DecoyType.Reverse, localizeableModificationTypes, CommonParameters);

List<SpectralMatch> allPsms = new List<SpectralMatch>();

Expand All @@ -62,18 +79,13 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
ProseCreatedWhileRunning.Append("precursor mass tolerance(s) = {" + tempSearchMode.ToProseString() + "}; ");

ProseCreatedWhileRunning.Append("product mass tolerance = " + CommonParameters.ProductMassTolerance + ". ");
ProseCreatedWhileRunning.Append("The combined search database contained " + proteinList.Count(p => !p.IsDecoy) + " non-decoy protein entries including " + proteinList.Where(p => p.IsContaminant).Count() + " contaminant sequences. ");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we got rid of this prose line?


// start the G-PTM-D task
Status("Running G-PTM-D...", new List<string> { taskId });
MyTaskResults = new MyTaskResults(this)
{
NewDatabases = new List<DbForTask>()
};
var fileSpecificCommonParams = fileSettingsList.Select(b => SetAllFileSpecificCommonParams(CommonParameters, b));
HashSet<DigestionParams> ListOfDigestionParams = new HashSet<DigestionParams>(fileSpecificCommonParams.Select(p => p.DigestionParams));

MyFileManager myFileManager = new MyFileManager(true);

object lock1 = new object();
object lock2 = new object();
Expand All @@ -94,14 +106,39 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
NewCollection(Path.GetFileName(origDataFile), new List<string> { taskId, "Individual Spectra Files", origDataFile });

Status("Loading spectra file...", new List<string> { taskId, "Individual Spectra Files", origDataFile });
MsDataFile myMsDataFile = myFileManager.LoadFile(origDataFile, combinedParams);

// ensure that the next file has finished loading from the async method
nextFileLoadingTask.Wait();
var myMsDataFile = nextFileLoadingTask.Result;
// if another file exists, then begin loading it in while the previous is being searched
if (origDataFile != currentRawFileList.Last())
{
int nextFileIndex = spectraFileIndex + 1;
nextFileLoadingTask = new Task<MsDataFile>(() => myFileManager.LoadFile(currentRawFileList[nextFileIndex], SetAllFileSpecificCommonParams(CommonParameters, fileSettingsList[nextFileIndex])));
nextFileLoadingTask.Start();
}
Status("Getting ms2 scans...", new List<string> { taskId, "Individual Spectra Files", origDataFile });
Ms2ScanWithSpecificMass[] arrayOfMs2ScansSortedByMass = GetMs2Scans(myMsDataFile, origDataFile, combinedParams).OrderBy(b => b.PrecursorMass).ToArray();
myFileManager.DoneWithFile(origDataFile);
SpectralMatch[] allPsmsArray = new PeptideSpectralMatch[arrayOfMs2ScansSortedByMass.Length];

//spectral Library search and library generation have't applied to GPTMD yet
bool writeSpctralLibrary = false;

// ensure proteins are loaded in before proceeding with search
switch (proteinLoadingTask.IsCompleted)
{
case true when proteinList is null: // has finished loading but not been set
proteinList = proteinLoadingTask.Result;
break;
case true when proteinList.Any(): // has finished loading and already been set
break;
case false: // has not finished loading
proteinLoadingTask.Wait();
proteinList = proteinLoadingTask.Result;
break;
}

new ClassicSearchEngine(allPsmsArray, arrayOfMs2ScansSortedByMass, variableModifications, fixedModifications, null, null, null,
proteinList, searchMode, combinedParams, this.FileSpecificParameters, null, new List<string> { taskId, "Individual Spectra Files", origDataFile }, writeSpctralLibrary).Run();
allPsms.AddRange(allPsmsArray.Where(p => p != null));
Expand All @@ -110,18 +147,8 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
}
ReportProgress(new ProgressEventArgs(100, "Done!", new List<string> { taskId, "Individual Spectra Files" }));

allPsms = allPsms.OrderByDescending(b => b.Score)
.ThenBy(b => b.BioPolymerWithSetModsMonoisotopicMass.HasValue ? Math.Abs(b.ScanPrecursorMass - b.BioPolymerWithSetModsMonoisotopicMass.Value) : double.MaxValue)
.GroupBy(b => new Tuple<string, int, double?>(b.FullFilePath, b.ScanNumber, b.BioPolymerWithSetModsMonoisotopicMass))
.Select(b => b.First()).ToList();

new FdrAnalysisEngine(allPsms, tempSearchMode.NumNotches, CommonParameters, this.FileSpecificParameters, new List<string> { taskId }).Run();

var writtenFile = Path.Combine(OutputFolder, "GPTMD_Candidates.psmtsv");
WritePsmsToTsv(allPsms, writtenFile, new Dictionary<string, int>());
FinishedWritingFile(writtenFile, new List<string> { taskId });
new FdrAnalysisEngine(allPsms.OrderBy(p=>p).ToList(), tempSearchMode.NumNotches, CommonParameters, this.FileSpecificParameters, new List<string> { taskId }, doPEP: false).Run();

// get file-specific precursor mass tolerances for the GPTMD engine
var filePathToPrecursorMassTolerance = new Dictionary<string, Tolerance>();
for (int i = 0; i < currentRawFileList.Count; i++)
{
Expand All @@ -135,6 +162,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
}

// run GPTMD engine
Status("Creating the GPTMD Database", new List<string> { taskId });
var gptmdResults = (GptmdResults)new GptmdEngine(allPsms, gptmdModifications, combos, filePathToPrecursorMassTolerance, CommonParameters, this.FileSpecificParameters, new List<string> { taskId }).Run();

// Stop if canceled
Expand Down Expand Up @@ -188,6 +216,7 @@ protected override MyTaskResults RunSpecific(string OutputFolder, List<DbForTask
MyTaskResults.AddTaskSummaryText("Mods types and counts:");
MyTaskResults.AddTaskSummaryText(string.Join(Environment.NewLine, newModsActuallyWritten.OrderByDescending(b => b.Value).Select(b => "\t" + b.Key + "\t" + b.Value)));
}
Status("Done", new List<string> { taskId });
return MyTaskResults;
}

Expand Down
38 changes: 4 additions & 34 deletions MetaMorpheus/Test/CustomFragmentationTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,9 @@ public static void CustomFragmentationManyTasks()
CollectionAssert.AreEquivalent(customIons, loadedSearchTask.CommonParameters.CustomIons);
Assert.That(loadedSearchTask.CommonParameters.DissociationType, Is.EqualTo(DissociationType.Custom));

// read gptmd and search results to ensure matched ions are correct
var gptmdResults = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "GPTMD", "GPTMD_Candidates.psmtsv"), out List<string> warnings);
var searchResults = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "Search", "AllPSMs.psmtsv"), out List<string> warnings);
Assert.That(!warnings.Any());
var productIons = gptmdResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons, productIons);

var searchResults = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "Search", "AllPSMs.psmtsv"), out warnings);
Assert.That(!warnings.Any());
productIons = searchResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
var productIons = searchResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons, productIons);

Expand All @@ -156,13 +149,6 @@ public static void CustomFragmentationManyTasks()
CollectionAssert.AreEquivalent(customIons, loadedSearchTask.CommonParameters.CustomIons);
Assert.That(loadedSearchTask.CommonParameters.DissociationType, Is.EqualTo(DissociationType.Custom));

// read gptmd and search results to ensure matched ions are correct
gptmdResults = PsmTsvReader.ReadTsv(Path.Combine(newOutputFolder, "GPTMD", "GPTMD_Candidates.psmtsv"), out warnings);
Assert.That(!warnings.Any());
productIons = gptmdResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons, productIons);

searchResults = PsmTsvReader.ReadTsv(Path.Combine(newOutputFolder, "Search", "AllPSMs.psmtsv"), out warnings);
Assert.That(!warnings.Any());
productIons = searchResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
Expand Down Expand Up @@ -298,16 +284,9 @@ public static void CustomFragmentIonsManySearchTasksContainingDifferentIons()
CollectionAssert.AreEquivalent(customIons3, loadedSearchTask3.CommonParameters.CustomIons);
Assert.That(loadedSearchTask3.CommonParameters.DissociationType == DissociationType.Custom);

// read gptmd and search results to ensure matched ions are correct
var gptmdResults = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "GPTMD", "GPTMD_Candidates.psmtsv"), out List<string> warnings);
var searchResults1 = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "Search1", "AllPSMs.psmtsv"), out List<string> warnings);
Assert.That(!warnings.Any());
var productIons = gptmdResults.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons2, productIons);

var searchResults1 = PsmTsvReader.ReadTsv(Path.Combine(outputFolder, "Search1", "AllPSMs.psmtsv"), out warnings);
Assert.That(!warnings.Any());
productIons = searchResults1.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
var productIons = searchResults1.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons1, productIons);

Expand Down Expand Up @@ -365,15 +344,6 @@ public static void CustomFragmentIonsManySearchTasksContainingDifferentIons()
CollectionAssert.AreEquivalent(customIons3, loadedSearchTask3.CommonParameters.CustomIons);
Assert.That(loadedSearchTask3.CommonParameters.DissociationType == DissociationType.Custom);

// read gptmd and search results to ensure matched ions are correct
gptmdResults = PsmTsvReader.ReadTsv(Path.Combine(newOutputFolder, "GPTMD", "GPTMD_Candidates.psmtsv"),
out warnings);
Assert.That(!warnings.Any());
productIons = gptmdResults
.SelectMany(p => p.MatchedIons.Select(m => m.NeutralTheoreticalProduct.ProductType))
.Distinct();
CollectionAssert.AreEquivalent(customIons2, productIons);

searchResults1 =
PsmTsvReader.ReadTsv(Path.Combine(newOutputFolder, "Search1", "AllPSMs.psmtsv"), out warnings);
Assert.That(!warnings.Any());
Expand Down
Loading