diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/FdrAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/FdrAnalysisEngine.cs index 68e1d58d5..794117c6a 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/FdrAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/FdrAnalysisEngine.cs @@ -314,7 +314,8 @@ public void Compute_PEPValue(FdrAnalysisResults myAnalysisResults, List /// This method contains the hyper-parameters that will be used when training the machine learning model /// /// Options object to be passed in to the FastTree constructor - public Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options BGDTreeOptions => + public static Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options BGDTreeOptions => new Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options { NumberOfThreads = 1, @@ -54,24 +54,24 @@ public class PepAnalysisEngine //The value of the dictionary is another dictionary that profiles the hydrophobicity behavior. //Each key is a retention time rounded to the nearest minute. //The value Tuple is the average and standard deviation, respectively, of the predicted hydrophobicities of the observed peptides eluting at that rounded retention time. - public Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_unmodified { get; private set; } - public Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_modified { get; private set; } - public Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_CZE { get; private set; } + public static Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_unmodified { get; private set; } + public static Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_modified { get; private set; } + public static Dictionary>> FileSpecificTimeDependantHydrophobicityAverageAndDeviation_CZE { get; private set; } /// /// A dictionary which stores the chimeric ID string in the key and the number of chimeric identifications as the vale /// - private Dictionary chimeraCountDictionary = new Dictionary(); - public Dictionary FileSpecificMedianFragmentMassErrors { get; private set; } - public Dictionary FileSpecificParametersDictionary { get; private set; } - public int ChargeStateMode { get; private set; } - - public double QValueCutoff { get; } - public bool UsePeptideLevelQValueForTraining = true; - public string[] TrainingVariables { get; } - public string OutputFolder { get; } - public List AllPsms { get; } - public string SearchType { get; } + private static Dictionary chimeraCountDictionary = new Dictionary(); + public static Dictionary FileSpecificMedianFragmentMassErrors { get; private set; } + public static Dictionary FileSpecificParametersDictionary { get; private set; } + public static int ChargeStateMode { get; private set; } + + public static double QValueCutoff { get; private set; } + public static bool UsePeptideLevelQValueForTraining = true; + public static string[] TrainingVariables { get; private set; } + public static string OutputFolder { get; private set; } + public static List AllPsms { get; private set; } + public static string SearchType { get; private set; } /// /// This method is used to compute the PEP values for all PSMs in a dataset. @@ -81,12 +81,28 @@ public class PepAnalysisEngine /// /// /// - public void SetFileSpecificParameters(List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters) + public static void SetFileSpecificParameters(List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters) { FileSpecificParametersDictionary = fileSpecificParameters.ToDictionary(p => Path.GetFileName(p.fileName), p => p.fileSpecificParameters); } - public PepAnalysisEngine(List psms, string searchType, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, string outputFolder) + //public PepAnalysisEngine(List psms, string searchType, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, string outputFolder) + //{ + // // This creates a new list of PSMs, but does not clone the Psms themselves. + // // This allows the PSMs to be modified and the order to be preserved + // AllPsms = psms.OrderByDescending(p => p).ToList(); + // TrainingVariables = PsmData.trainingInfos[searchType]; + // OutputFolder = outputFolder; + // SearchType = searchType; + // SetFileSpecificParameters(fileSpecificParameters); + // BuildFileSpecificDictionaries(psms, TrainingVariables); + // QValueCutoff = Math.Max(fileSpecificParameters.Select(t => t.fileSpecificParameters.QValueCutoffForPepCalculation).Min(), 0.005); + + // // If we have more than 100 peptides, we will train on the peptide level. Otherwise, we will train on the PSM level + // UsePeptideLevelQValueForTraining = psms.Select(psm => psm.FullSequence).Distinct().Count(seq => seq.IsNotNullOrEmpty()) >= 100; + //} + + public static void SetVariables(List psms, string searchType, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, string outputFolder) { // This creates a new list of PSMs, but does not clone the Psms themselves. // This allows the PSMs to be modified and the order to be preserved @@ -102,7 +118,7 @@ public PepAnalysisEngine(List psms, string searchType, List<(stri UsePeptideLevelQValueForTraining = psms.Select(psm => psm.FullSequence).Distinct().Count(seq => seq.IsNotNullOrEmpty()) >= 100; } - public string ComputePEPValuesForAllPSMs() + public static string ComputePEPValuesForAllPSMs() { //List peptideGroups = UsePeptideLevelQValueForTraining // ? PeptideMatchGroup.GroupByBaseSequence(AllPsms) @@ -171,7 +187,7 @@ public string ComputePEPValuesForAllPSMs() /// /// The PSMs that will be used for training /// An array of training variables from PsmData.trainingInfos dictionary - public void BuildFileSpecificDictionaries(List trainingData, string[] trainingVariables) + public static void BuildFileSpecificDictionaries(List trainingData, string[] trainingVariables) { FileSpecificMedianFragmentMassErrors = GetFileSpecificMedianFragmentMassError(trainingData); ChargeStateMode = GetChargeStateMode(trainingData); @@ -242,7 +258,7 @@ static List> DivideListIntoGroups(List list, int numGroups) return groups; } - public IEnumerable CreatePsmData(string searchType, + public static IEnumerable CreatePsmData(string searchType, List peptideGroups, List peptideGroupIndices) { object psmDataListLock = new object(); @@ -387,7 +403,7 @@ public static string AggregateMetricsForOutput(List peptideGroups, + public static int Compute_PSM_PEP(List peptideGroups, List peptideGroupIndices, MLContext mLContext, TransformerChain>> trainedModel, string searchType, string outputFolder) { @@ -469,7 +485,7 @@ public int Compute_PSM_PEP(List peptideGroups, return ambiguousPeptidesResolved; } - public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide, int notchToUse, bool label) + public static PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide, int notchToUse, bool label) { double normalizationFactor = selectedPeptide.BaseSequence.Length; float totalMatchingFragmentCount = 0; @@ -530,11 +546,11 @@ public PsmData CreateOnePsmDataEntry(string searchType, SpectralMatch psm, IBioP complementaryIonCount = (float)Math.Round(SpectralMatch.GetCountComplementaryIons(psm.BioPolymersWithSetModsToMatchingFragments, selectedPeptide) / normalizationFactor * multiplier, 0); isVariantPeptide = PeptideIsVariant(selectedPeptide); spectralAngle = (float)psm.SpectralAngle; - if (chimeraCountDictionary.TryGetValue(psm.ChimeraIdString, out int val)) - chimeraCount = val; - peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount; - mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0); - fractionalIntensity = (float)psm.PrecursorFractionalIntensity; + //if (chimeraCountDictionary.TryGetValue(psm.ChimeraIdString, out int val)) + // chimeraCount = val; + //peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount; + //mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0); + //fractionalIntensity = (float)psm.PrecursorFractionalIntensity; if (PsmHasSpectralAngle(psm)) { @@ -692,12 +708,12 @@ public static void GetIndiciesOfPeptidesToRemove(List indiciesOfPeptidesToR /// /// Here we're getting the most common charge state for precursors that are Targets with q<=0.01. - public int GetChargeStateMode(List psms) + public static int GetChargeStateMode(List psms) { return psms.Where(p => p.IsDecoy != true && p.GetFdrInfo(UsePeptideLevelQValueForTraining).QValue <= 0.01).Select(p => p.ScanPrecursorCharge).GroupBy(n => n).OrderByDescending(g => g.Count()).Select(g => g.Key).FirstOrDefault(); } - public Dictionary>> ComputeHydrophobicityValues(List psms, bool computeHydrophobicitiesforModifiedPeptides) + public static Dictionary>> ComputeHydrophobicityValues(List psms, bool computeHydrophobicitiesforModifiedPeptides) { SSRCalc3 calc = new SSRCalc3("SSRCalc 3.0 (300A)", SSRCalc3.Column.A300); @@ -802,7 +818,7 @@ public Dictionary>> ComputeHydroph return rtHydrophobicityAvgDev; } - public Dictionary>> ComputeMobilityValues(List psms) + public static Dictionary>> ComputeMobilityValues(List psms) { Dictionary>> rtMobilityAvgDev = new Dictionary>>(); @@ -939,7 +955,7 @@ private static float GetSSRCalcHydrophobicityZScore(SpectralMatch psm, IBioPolym return (float)hydrophobicityZscore; } - private float GetMobilityZScore(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide) + private static float GetMobilityZScore(SpectralMatch psm, IBioPolymerWithSetMods selectedPeptide) { double mobilityZScore = double.NaN;