diff --git a/MetaMorpheus/EngineLayer/CommonParameters.cs b/MetaMorpheus/EngineLayer/CommonParameters.cs
index 25dc9370e..335749707 100644
--- a/MetaMorpheus/EngineLayer/CommonParameters.cs
+++ b/MetaMorpheus/EngineLayer/CommonParameters.cs
@@ -34,6 +34,7 @@ public CommonParameters(
int totalPartitions = 1,
double qValueThreshold = 0.01,
double pepQValueThreshold = 1.0,
+ double qValueCutoffForPepCalculation = 0.005,
double scoreCutoff = 5,
int? numberOfPeaksToKeepPerWindow = 200,
double? minimumAllowedIntensityRatioToBasePeak = 0.01,
@@ -67,6 +68,7 @@ public CommonParameters(
TotalPartitions = totalPartitions;
QValueThreshold = qValueThreshold;
PepQValueThreshold = pepQValueThreshold;
+ QValueCutoffForPepCalculation = qValueCutoffForPepCalculation;
ScoreCutoff = scoreCutoff;
NumberOfPeaksToKeepPerWindow = numberOfPeaksToKeepPerWindow;
MinimumAllowedIntensityRatioToBasePeak = minimumAllowedIntensityRatioToBasePeak;
@@ -157,6 +159,11 @@ public int DeconvolutionMaxAssumedChargeState
///
public double PepQValueThreshold { get; private set; }
public double ScoreCutoff { get; private set; }
+ ///
+ /// This parameter determines which PSMs/Peptides will be used as postive training examples
+ /// when training the GBDT model for PEP.
+ ///
+ public double QValueCutoffForPepCalculation { get; private set; }
public DigestionParams DigestionParams { get; private set; }
public bool ReportAllAmbiguity { get; private set; }
public int? NumberOfPeaksToKeepPerWindow { get; private set; }
@@ -225,6 +232,7 @@ public CommonParameters CloneWithNewTerminus(FragmentationTerminus? terminus = n
TotalPartitions,
QValueThreshold,
PepQValueThreshold,
+ QValueCutoffForPepCalculation,
ScoreCutoff,
NumberOfPeaksToKeepPerWindow,
MinimumAllowedIntensityRatioToBasePeak,
diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs
index a1781910a..00aae1d23 100644
--- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs
+++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs
@@ -54,36 +54,26 @@ public static string ComputePEPValuesForAllPSMsGeneric(List psms,
.Select(b => b.FirstOrDefault()).ToList();
List countOfPeptidesInEachFile = peptides.GroupBy(b => b.FullFilePath).Select(b => b.Count()).ToList();
bool allFilesContainPeptides = (countOfPeptidesInEachFile.Count == fileSpecificParameters.Count); //rare condition where each file has psms but some files don't have peptides. probably only happens in unit tests.
+ QValueCutoff = fileSpecificParameters.Select(t => t.fileSpecificParameters.QValueCutoffForPepCalculation).Min();
int chargeStateMode = 0;
- int numberOfPositiveTrainingExamples = 0;
Dictionary fileSpecificMedianFragmentMassErrors = new Dictionary();
- while (numberOfPositiveTrainingExamples < 10)
+ if (peptides.Count() > 100 && allFilesContainPeptides)
{
- if (peptides.Count() > 100 && allFilesContainPeptides)
- {
- foreach (var peptide in peptides)
- {
- allPeptideIndices.Add(psms.IndexOf(peptide));
- }
- chargeStateMode = GetChargeStateMode(peptides);
- fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(peptides);
- numberOfPositiveTrainingExamples = peptides.Count(peptide => peptide.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff);
- }
- else
+ foreach (var peptide in peptides)
{
- //there are too few psms to do any meaningful training if we used only peptides. So, we will train using psms instead.
- UsePeptideLevelQValueForTraining = false;
- numberOfPositiveTrainingExamples = psms.Count(psm => psm.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff);
- allPeptideIndices = Enumerable.Range(0, psms.Count).ToList();
- chargeStateMode = GetChargeStateMode(psms);
- fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(psms);
- }
-
- if (numberOfPositiveTrainingExamples < 10)
- {
- QValueCutoff = QValueCutoff * 2;
+ allPeptideIndices.Add(psms.IndexOf(peptide));
}
+ chargeStateMode = GetChargeStateMode(peptides);
+ fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(peptides);
+ }
+ else
+ {
+ //there are too few psms to do any meaningful training if we used only peptides. So, we will train using psms instead.
+ UsePeptideLevelQValueForTraining = false;
+ allPeptideIndices = Enumerable.Range(0, psms.Count).ToList();
+ chargeStateMode = GetChargeStateMode(psms);
+ fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(psms);
}
diff --git a/MetaMorpheus/Test/SpectralRecoveryTest.cs b/MetaMorpheus/Test/SpectralRecoveryTest.cs
index af746dad4..f72742896 100644
--- a/MetaMorpheus/Test/SpectralRecoveryTest.cs
+++ b/MetaMorpheus/Test/SpectralRecoveryTest.cs
@@ -49,14 +49,16 @@ public void SpectralRecoveryTestSetup()
string databasePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", @"SpectralRecoveryTest\HumanFastaSlice.fasta");
proteinList = ProteinDbLoader.LoadProteinFasta(databasePath, true, DecoyType.Reverse, false, out List errors)
.Where(protein => protein.AppliedSequenceVariations != null).ToList();
+ CommonParameters commonParameters = new CommonParameters();
+
foreach (PsmFromTsv readPsm in tsvPsms.Where(psm => !psm.FullSequence.Contains('['))) // Modifications break the parser
{
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory,
"TestData", "SpectralRecoveryTest", readPsm.FileNameWithoutExtension + ".mzML");
- MsDataScan scan = myFileManager.LoadFile(filePath, new CommonParameters()).GetOneBasedScan(readPsm.Ms2ScanNumber);
+ MsDataScan scan = myFileManager.LoadFile(filePath, commonParameters).GetOneBasedScan(readPsm.Ms2ScanNumber);
Ms2ScanWithSpecificMass ms2Scan = new Ms2ScanWithSpecificMass(scan, readPsm.PrecursorMz, readPsm.PrecursorCharge,
- filePath, new CommonParameters());
+ filePath, commonParameters);
Protein protein = proteinList.First(protein => protein.Accession == readPsm.ProteinAccession);
//string[] startAndEndResidues = readPsm.StartAndEndResiduesInProtein.Split(" ");
@@ -99,7 +101,7 @@ public void SpectralRecoveryTestSetup()
MassDiffAcceptorType = MassDiffAcceptorType.ThreeMM,
WriteHighQValuePsms = true
},
- CommonParameters = new CommonParameters()
+ CommonParameters = new CommonParameters(qValueCutoffForPepCalculation: 0.01)
};
searchTaskResults = searchTask.RunTask(outputFolder, databaseList, rawSlices, "name");
@@ -130,10 +132,10 @@ public void SpectralRecoveryTestSetup()
QuantifyPpmTol = 25
}
},
- CommonParameters = new CommonParameters(dissociationType: DissociationType.Autodetect),
+ CommonParameters = new CommonParameters(dissociationType: DissociationType.Autodetect, qValueCutoffForPepCalculation: 0.01),
FileSpecificParameters = new List<(string FileName, CommonParameters Parameters)> {
- (rawSlices[0], new CommonParameters()),
- (rawSlices[1], new CommonParameters())
+ (rawSlices[0], new CommonParameters(qValueCutoffForPepCalculation: 0.01)),
+ (rawSlices[1], new CommonParameters(qValueCutoffForPepCalculation: 0.01))
}
};