From 95b31357d6c975ffd369dc915d27935906301fea Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 29 Jul 2024 11:43:40 -0500 Subject: [PATCH] Added QValueThresholdForPEP to common params --- MetaMorpheus/EngineLayer/CommonParameters.cs | 8 ++++ .../FdrAnalysis/PEPValueAnalysisGeneric.cs | 38 +++++++------------ MetaMorpheus/Test/SpectralRecoveryTest.cs | 14 ++++--- 3 files changed, 30 insertions(+), 30 deletions(-) diff --git a/MetaMorpheus/EngineLayer/CommonParameters.cs b/MetaMorpheus/EngineLayer/CommonParameters.cs index 25dc9370e..335749707 100644 --- a/MetaMorpheus/EngineLayer/CommonParameters.cs +++ b/MetaMorpheus/EngineLayer/CommonParameters.cs @@ -34,6 +34,7 @@ public CommonParameters( int totalPartitions = 1, double qValueThreshold = 0.01, double pepQValueThreshold = 1.0, + double qValueCutoffForPepCalculation = 0.005, double scoreCutoff = 5, int? numberOfPeaksToKeepPerWindow = 200, double? minimumAllowedIntensityRatioToBasePeak = 0.01, @@ -67,6 +68,7 @@ public CommonParameters( TotalPartitions = totalPartitions; QValueThreshold = qValueThreshold; PepQValueThreshold = pepQValueThreshold; + QValueCutoffForPepCalculation = qValueCutoffForPepCalculation; ScoreCutoff = scoreCutoff; NumberOfPeaksToKeepPerWindow = numberOfPeaksToKeepPerWindow; MinimumAllowedIntensityRatioToBasePeak = minimumAllowedIntensityRatioToBasePeak; @@ -157,6 +159,11 @@ public int DeconvolutionMaxAssumedChargeState /// public double PepQValueThreshold { get; private set; } public double ScoreCutoff { get; private set; } + /// + /// This parameter determines which PSMs/Peptides will be used as postive training examples + /// when training the GBDT model for PEP. + /// + public double QValueCutoffForPepCalculation { get; private set; } public DigestionParams DigestionParams { get; private set; } public bool ReportAllAmbiguity { get; private set; } public int? NumberOfPeaksToKeepPerWindow { get; private set; } @@ -225,6 +232,7 @@ public CommonParameters CloneWithNewTerminus(FragmentationTerminus? terminus = n TotalPartitions, QValueThreshold, PepQValueThreshold, + QValueCutoffForPepCalculation, ScoreCutoff, NumberOfPeaksToKeepPerWindow, MinimumAllowedIntensityRatioToBasePeak, diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs index a1781910a..00aae1d23 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs @@ -54,36 +54,26 @@ public static string ComputePEPValuesForAllPSMsGeneric(List psms, .Select(b => b.FirstOrDefault()).ToList(); List countOfPeptidesInEachFile = peptides.GroupBy(b => b.FullFilePath).Select(b => b.Count()).ToList(); bool allFilesContainPeptides = (countOfPeptidesInEachFile.Count == fileSpecificParameters.Count); //rare condition where each file has psms but some files don't have peptides. probably only happens in unit tests. + QValueCutoff = fileSpecificParameters.Select(t => t.fileSpecificParameters.QValueCutoffForPepCalculation).Min(); int chargeStateMode = 0; - int numberOfPositiveTrainingExamples = 0; Dictionary fileSpecificMedianFragmentMassErrors = new Dictionary(); - while (numberOfPositiveTrainingExamples < 10) + if (peptides.Count() > 100 && allFilesContainPeptides) { - if (peptides.Count() > 100 && allFilesContainPeptides) - { - foreach (var peptide in peptides) - { - allPeptideIndices.Add(psms.IndexOf(peptide)); - } - chargeStateMode = GetChargeStateMode(peptides); - fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(peptides); - numberOfPositiveTrainingExamples = peptides.Count(peptide => peptide.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff); - } - else + foreach (var peptide in peptides) { - //there are too few psms to do any meaningful training if we used only peptides. So, we will train using psms instead. - UsePeptideLevelQValueForTraining = false; - numberOfPositiveTrainingExamples = psms.Count(psm => psm.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff); - allPeptideIndices = Enumerable.Range(0, psms.Count).ToList(); - chargeStateMode = GetChargeStateMode(psms); - fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(psms); - } - - if (numberOfPositiveTrainingExamples < 10) - { - QValueCutoff = QValueCutoff * 2; + allPeptideIndices.Add(psms.IndexOf(peptide)); } + chargeStateMode = GetChargeStateMode(peptides); + fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(peptides); + } + else + { + //there are too few psms to do any meaningful training if we used only peptides. So, we will train using psms instead. + UsePeptideLevelQValueForTraining = false; + allPeptideIndices = Enumerable.Range(0, psms.Count).ToList(); + chargeStateMode = GetChargeStateMode(psms); + fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(psms); } diff --git a/MetaMorpheus/Test/SpectralRecoveryTest.cs b/MetaMorpheus/Test/SpectralRecoveryTest.cs index af746dad4..f72742896 100644 --- a/MetaMorpheus/Test/SpectralRecoveryTest.cs +++ b/MetaMorpheus/Test/SpectralRecoveryTest.cs @@ -49,14 +49,16 @@ public void SpectralRecoveryTestSetup() string databasePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", @"SpectralRecoveryTest\HumanFastaSlice.fasta"); proteinList = ProteinDbLoader.LoadProteinFasta(databasePath, true, DecoyType.Reverse, false, out List errors) .Where(protein => protein.AppliedSequenceVariations != null).ToList(); + CommonParameters commonParameters = new CommonParameters(); + foreach (PsmFromTsv readPsm in tsvPsms.Where(psm => !psm.FullSequence.Contains('['))) // Modifications break the parser { string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", "SpectralRecoveryTest", readPsm.FileNameWithoutExtension + ".mzML"); - MsDataScan scan = myFileManager.LoadFile(filePath, new CommonParameters()).GetOneBasedScan(readPsm.Ms2ScanNumber); + MsDataScan scan = myFileManager.LoadFile(filePath, commonParameters).GetOneBasedScan(readPsm.Ms2ScanNumber); Ms2ScanWithSpecificMass ms2Scan = new Ms2ScanWithSpecificMass(scan, readPsm.PrecursorMz, readPsm.PrecursorCharge, - filePath, new CommonParameters()); + filePath, commonParameters); Protein protein = proteinList.First(protein => protein.Accession == readPsm.ProteinAccession); //string[] startAndEndResidues = readPsm.StartAndEndResiduesInProtein.Split(" "); @@ -99,7 +101,7 @@ public void SpectralRecoveryTestSetup() MassDiffAcceptorType = MassDiffAcceptorType.ThreeMM, WriteHighQValuePsms = true }, - CommonParameters = new CommonParameters() + CommonParameters = new CommonParameters(qValueCutoffForPepCalculation: 0.01) }; searchTaskResults = searchTask.RunTask(outputFolder, databaseList, rawSlices, "name"); @@ -130,10 +132,10 @@ public void SpectralRecoveryTestSetup() QuantifyPpmTol = 25 } }, - CommonParameters = new CommonParameters(dissociationType: DissociationType.Autodetect), + CommonParameters = new CommonParameters(dissociationType: DissociationType.Autodetect, qValueCutoffForPepCalculation: 0.01), FileSpecificParameters = new List<(string FileName, CommonParameters Parameters)> { - (rawSlices[0], new CommonParameters()), - (rawSlices[1], new CommonParameters()) + (rawSlices[0], new CommonParameters(qValueCutoffForPepCalculation: 0.01)), + (rawSlices[1], new CommonParameters(qValueCutoffForPepCalculation: 0.01)) } };