Skip to content

Commit

Permalink
Added QValueThresholdForPEP to common params
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander-Sol committed Jul 29, 2024
1 parent eb69623 commit 95b3135
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 30 deletions.
8 changes: 8 additions & 0 deletions MetaMorpheus/EngineLayer/CommonParameters.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public CommonParameters(
int totalPartitions = 1,
double qValueThreshold = 0.01,
double pepQValueThreshold = 1.0,
double qValueCutoffForPepCalculation = 0.005,
double scoreCutoff = 5,
int? numberOfPeaksToKeepPerWindow = 200,
double? minimumAllowedIntensityRatioToBasePeak = 0.01,
Expand Down Expand Up @@ -67,6 +68,7 @@ public CommonParameters(
TotalPartitions = totalPartitions;
QValueThreshold = qValueThreshold;
PepQValueThreshold = pepQValueThreshold;
QValueCutoffForPepCalculation = qValueCutoffForPepCalculation;
ScoreCutoff = scoreCutoff;
NumberOfPeaksToKeepPerWindow = numberOfPeaksToKeepPerWindow;
MinimumAllowedIntensityRatioToBasePeak = minimumAllowedIntensityRatioToBasePeak;
Expand Down Expand Up @@ -157,6 +159,11 @@ public int DeconvolutionMaxAssumedChargeState
/// </summary>
public double PepQValueThreshold { get; private set; }
public double ScoreCutoff { get; private set; }
/// <summary>
/// This parameter determines which PSMs/Peptides will be used as postive training examples
/// when training the GBDT model for PEP.
/// </summary>
public double QValueCutoffForPepCalculation { get; private set; }
public DigestionParams DigestionParams { get; private set; }
public bool ReportAllAmbiguity { get; private set; }
public int? NumberOfPeaksToKeepPerWindow { get; private set; }
Expand Down Expand Up @@ -225,6 +232,7 @@ public CommonParameters CloneWithNewTerminus(FragmentationTerminus? terminus = n
TotalPartitions,
QValueThreshold,
PepQValueThreshold,
QValueCutoffForPepCalculation,
ScoreCutoff,
NumberOfPeaksToKeepPerWindow,
MinimumAllowedIntensityRatioToBasePeak,
Expand Down
38 changes: 14 additions & 24 deletions MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs
Original file line number Diff line number Diff line change
Expand Up @@ -54,36 +54,26 @@ public static string ComputePEPValuesForAllPSMsGeneric(List<SpectralMatch> psms,
.Select(b => b.FirstOrDefault()).ToList();
List<int> countOfPeptidesInEachFile = peptides.GroupBy(b => b.FullFilePath).Select(b => b.Count()).ToList();
bool allFilesContainPeptides = (countOfPeptidesInEachFile.Count == fileSpecificParameters.Count); //rare condition where each file has psms but some files don't have peptides. probably only happens in unit tests.
QValueCutoff = fileSpecificParameters.Select(t => t.fileSpecificParameters.QValueCutoffForPepCalculation).Min();

int chargeStateMode = 0;
int numberOfPositiveTrainingExamples = 0;
Dictionary<string, float> fileSpecificMedianFragmentMassErrors = new Dictionary<string, float>();
while (numberOfPositiveTrainingExamples < 10)
if (peptides.Count() > 100 && allFilesContainPeptides)
{
if (peptides.Count() > 100 && allFilesContainPeptides)
{
foreach (var peptide in peptides)
{
allPeptideIndices.Add(psms.IndexOf(peptide));
}
chargeStateMode = GetChargeStateMode(peptides);
fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(peptides);
numberOfPositiveTrainingExamples = peptides.Count(peptide => peptide.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff);
}
else
foreach (var peptide in peptides)
{
//there are too few psms to do any meaningful training if we used only peptides. So, we will train using psms instead.
UsePeptideLevelQValueForTraining = false;
numberOfPositiveTrainingExamples = psms.Count(psm => psm.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= QValueCutoff);
allPeptideIndices = Enumerable.Range(0, psms.Count).ToList();
chargeStateMode = GetChargeStateMode(psms);
fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(psms);
}

if (numberOfPositiveTrainingExamples < 10)
{
QValueCutoff = QValueCutoff * 2;
allPeptideIndices.Add(psms.IndexOf(peptide));
}
chargeStateMode = GetChargeStateMode(peptides);
fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(peptides);
}
else
{
//there are too few psms to do any meaningful training if we used only peptides. So, we will train using psms instead.
UsePeptideLevelQValueForTraining = false;
allPeptideIndices = Enumerable.Range(0, psms.Count).ToList();
chargeStateMode = GetChargeStateMode(psms);
fileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(psms);
}


Expand Down
14 changes: 8 additions & 6 deletions MetaMorpheus/Test/SpectralRecoveryTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,16 @@ public void SpectralRecoveryTestSetup()
string databasePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "TestData", @"SpectralRecoveryTest\HumanFastaSlice.fasta");
proteinList = ProteinDbLoader.LoadProteinFasta(databasePath, true, DecoyType.Reverse, false, out List<string> errors)
.Where(protein => protein.AppliedSequenceVariations != null).ToList();
CommonParameters commonParameters = new CommonParameters();


foreach (PsmFromTsv readPsm in tsvPsms.Where(psm => !psm.FullSequence.Contains('['))) // Modifications break the parser
{
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory,
"TestData", "SpectralRecoveryTest", readPsm.FileNameWithoutExtension + ".mzML");
MsDataScan scan = myFileManager.LoadFile(filePath, new CommonParameters()).GetOneBasedScan(readPsm.Ms2ScanNumber);
MsDataScan scan = myFileManager.LoadFile(filePath, commonParameters).GetOneBasedScan(readPsm.Ms2ScanNumber);
Ms2ScanWithSpecificMass ms2Scan = new Ms2ScanWithSpecificMass(scan, readPsm.PrecursorMz, readPsm.PrecursorCharge,
filePath, new CommonParameters());
filePath, commonParameters);
Protein protein = proteinList.First(protein => protein.Accession == readPsm.ProteinAccession);

//string[] startAndEndResidues = readPsm.StartAndEndResiduesInProtein.Split(" ");
Expand Down Expand Up @@ -99,7 +101,7 @@ public void SpectralRecoveryTestSetup()
MassDiffAcceptorType = MassDiffAcceptorType.ThreeMM,
WriteHighQValuePsms = true
},
CommonParameters = new CommonParameters()
CommonParameters = new CommonParameters(qValueCutoffForPepCalculation: 0.01)
};
searchTaskResults = searchTask.RunTask(outputFolder, databaseList, rawSlices, "name");

Expand Down Expand Up @@ -130,10 +132,10 @@ public void SpectralRecoveryTestSetup()
QuantifyPpmTol = 25
}
},
CommonParameters = new CommonParameters(dissociationType: DissociationType.Autodetect),
CommonParameters = new CommonParameters(dissociationType: DissociationType.Autodetect, qValueCutoffForPepCalculation: 0.01),
FileSpecificParameters = new List<(string FileName, CommonParameters Parameters)> {
(rawSlices[0], new CommonParameters()),
(rawSlices[1], new CommonParameters())
(rawSlices[0], new CommonParameters(qValueCutoffForPepCalculation: 0.01)),
(rawSlices[1], new CommonParameters(qValueCutoffForPepCalculation: 0.01))
}
};

Expand Down

0 comments on commit 95b3135

Please sign in to comment.