diff --git a/MetaMorpheus/CMD/CMD.csproj b/MetaMorpheus/CMD/CMD.csproj index 44eb33bbe..9a9f0d891 100644 --- a/MetaMorpheus/CMD/CMD.csproj +++ b/MetaMorpheus/CMD/CMD.csproj @@ -24,7 +24,7 @@ - + diff --git a/MetaMorpheus/EngineLayer/EngineLayer.csproj b/MetaMorpheus/EngineLayer/EngineLayer.csproj index 9c82f4ec2..600c08d90 100644 --- a/MetaMorpheus/EngineLayer/EngineLayer.csproj +++ b/MetaMorpheus/EngineLayer/EngineLayer.csproj @@ -21,7 +21,7 @@ - + diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/FdrInfo.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/FdrInfo.cs index 85acb5801..dc1327904 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/FdrInfo.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/FdrInfo.cs @@ -10,5 +10,16 @@ public class FdrInfo public double QValueNotch { get; set; } public double PEP { get; set; } public double PEP_QValue { get; set; } + + /// + /// Creates a new FdrInfo object where Q-Values and PEP_Qvalues are set to 2 by default + /// This is done to avoid situations where q-values aren't calcualted for a given peptides, but it is still + /// reported in the final results. + /// + public FdrInfo() + { + QValue = 2; + PEP_QValue = 2; + } } } \ No newline at end of file diff --git a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs index f68cd3c0a..5425c3061 100644 --- a/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs +++ b/MetaMorpheus/EngineLayer/FdrAnalysis/PEPAnalysisEngine.cs @@ -22,6 +22,27 @@ namespace EngineLayer { public class PepAnalysisEngine { + private int _randomSeed = 42; + + /// + /// This method contains the hyper-parameters that will be used when training the machine learning model + /// + /// Options object to be passed in to the FastTree constructor + public Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options BGDTreeOptions => + new Microsoft.ML.Trainers.FastTree.FastTreeBinaryTrainer.Options + { + NumberOfThreads = 1, + NumberOfTrees = 400, + MinimumExampleCountPerLeaf = 10, + NumberOfLeaves = 20, + LearningRate = 0.2, + LabelColumnName = "Label", + FeatureColumnName = "Features", + Seed = _randomSeed, + FeatureSelectionSeed = _randomSeed, + RandomStart = false + }; + private static readonly double AbsoluteProbabilityThatDistinguishesPeptides = 0.05; //These two dictionaries contain the average and standard deviations of hydrophobicitys measured in 1 minute increments accross each raw @@ -105,10 +126,10 @@ public string ComputePEPValuesForAllPSMs() } } - MLContext mlContext = new MLContext(); + MLContext mlContext = new MLContext(seed: _randomSeed); TransformerChain>>[] trainedModels = new TransformerChain>>[numGroups]; - var trainer = mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Label", featureColumnName: "Features", numberOfTrees: 400); + var trainer = mlContext.BinaryClassification.Trainers.FastTree(BGDTreeOptions); var pipeline = mlContext.Transforms.Concatenate("Features", TrainingVariables) .Append(trainer); diff --git a/MetaMorpheus/GUI/GUI.csproj b/MetaMorpheus/GUI/GUI.csproj index df8cfd5df..b2912b0de 100644 --- a/MetaMorpheus/GUI/GUI.csproj +++ b/MetaMorpheus/GUI/GUI.csproj @@ -55,7 +55,7 @@ - + diff --git a/MetaMorpheus/GuiFunctions/GuiFunctions.csproj b/MetaMorpheus/GuiFunctions/GuiFunctions.csproj index e2989fbcb..1e5e122fe 100644 --- a/MetaMorpheus/GuiFunctions/GuiFunctions.csproj +++ b/MetaMorpheus/GuiFunctions/GuiFunctions.csproj @@ -16,7 +16,7 @@ - + diff --git a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs index 69e73a402..e30b4549a 100644 --- a/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs +++ b/MetaMorpheus/TaskLayer/SearchTask/PostSearchAnalysisTask.cs @@ -137,8 +137,8 @@ private void CalculatePsmAndPeptideFdr(List psms, string analysis // this could cause weird PSM FDR issues Status("Estimating PSM FDR...", Parameters.SearchTaskId); - new FdrAnalysisEngine(psms, Parameters.NumNotches, CommonParameters, this.FileSpecificParameters, - new List { Parameters.SearchTaskId }, analysisType: analysisType, doPEP: doPep, outputFolder: Parameters.OutputFolder).Run(); + new FdrAnalysisEngine(psms, Parameters.NumNotches, CommonParameters, this.FileSpecificParameters, + new List { Parameters.SearchTaskId }, analysisType: analysisType, doPEP: doPep, outputFolder: Parameters.OutputFolder).Run(); Status("Done estimating PSM FDR!", Parameters.SearchTaskId); } diff --git a/MetaMorpheus/TaskLayer/TaskLayer.csproj b/MetaMorpheus/TaskLayer/TaskLayer.csproj index 35f063006..8b96a5be5 100644 --- a/MetaMorpheus/TaskLayer/TaskLayer.csproj +++ b/MetaMorpheus/TaskLayer/TaskLayer.csproj @@ -21,7 +21,7 @@ - + diff --git a/MetaMorpheus/Test/Test.csproj b/MetaMorpheus/Test/Test.csproj index a4d8b9f5c..e46489a22 100644 --- a/MetaMorpheus/Test/Test.csproj +++ b/MetaMorpheus/Test/Test.csproj @@ -24,7 +24,7 @@ - + diff --git a/MetaMorpheus/Test/XLTest.cs b/MetaMorpheus/Test/XLTest.cs index 5de9a1e65..f8a136ed7 100644 --- a/MetaMorpheus/Test/XLTest.cs +++ b/MetaMorpheus/Test/XLTest.cs @@ -561,8 +561,8 @@ public static void XlTest_MoreComprehensive() } } - Assert.AreEqual(47, inter); - Assert.AreEqual(73, intra); + Assert.AreEqual(53, inter); + Assert.AreEqual(81, intra); Assert.AreEqual(0, unnasignedCrossType);