Skip to content

Commit

Permalink
Merge branch 'master' into ShortreedPep3
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexander-Sol authored Jul 29, 2024
2 parents 61762bc + 7dab370 commit eb69623
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 30 deletions.
55 changes: 45 additions & 10 deletions MetaMorpheus/EngineLayer/FdrAnalysis/PEPValueAnalysisGeneric.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,23 @@ public static class PEP_Analysis_Cross_Validation
private static Dictionary<string, Dictionary<int, Tuple<double, double>>> fileSpecificTimeDependantHydrophobicityAverageAndDeviation_unmodified = new Dictionary<string, Dictionary<int, Tuple<double, double>>>();
private static Dictionary<string, Dictionary<int, Tuple<double, double>>> fileSpecificTimeDependantHydrophobicityAverageAndDeviation_modified = new Dictionary<string, Dictionary<int, Tuple<double, double>>>();
private static Dictionary<string, Dictionary<int, Tuple<double, double>>> fileSpecificTimeDependantHydrophobicityAverageAndDeviation_CZE = new Dictionary<string, Dictionary<int, Tuple<double, double>>>();


/// <summary>
/// A dictionary which stores the chimeric ID string in the key and the number of chimeric identifications as the vale
/// </summary>
private static Dictionary<string, int> chimeraCountDictionary = new Dictionary<string, int>();
public static bool UsePeptideLevelQValueForTraining = true;
public static double QValueCutoff = 0.005;


/// <summary>
/// This method is used to compute the PEP values for all PSMs in a dataset.
/// </summary>
/// <param name="psms"></param>
/// <param name="searchType"></param>
/// <param name="fileSpecificParameters"></param>
/// <param name="outputFolder"></param>
/// <returns></returns>
public static string ComputePEPValuesForAllPSMsGeneric(List<SpectralMatch> psms, string searchType, List<(string fileName, CommonParameters fileSpecificParameters)> fileSpecificParameters, string outputFolder)
{
string[] trainingVariables = PsmData.trainingInfos[searchType];
Expand Down Expand Up @@ -100,6 +113,10 @@ public static string ComputePEPValuesForAllPSMsGeneric(List<SpectralMatch> psms,
}
}

if (trainingVariables.Contains("ChimeraCount"))
chimeraCountDictionary = psms.GroupBy(p => p.ChimeraIdString)
.ToDictionary(p => p.Key, p => p.Count());

MLContext mlContext = new MLContext();

//the number of groups used for cross-validation is hard-coded at four. Do not change this number without changes other areas of effected code.
Expand Down Expand Up @@ -768,13 +785,20 @@ public static PsmData CreateOnePsmDataEntry(string searchType, List<(string file
{
double normalizationFactor = selectedPeptide.BaseSequence.Length;
float totalMatchingFragmentCount = 0;
float internalMatchingFragmentCount = 0;
float intensity = 0;
float chargeDifference = 0;
float deltaScore = 0;
int notch = 0;
float ambiguity = 0;
float modCount = 0;
float absoluteFragmentMassError = 0;
float spectralAngle = 0;
float hasSpectralAngle = 0;
float chimeraCount = 0;
float peaksInPrecursorEnvelope = 0;
float mostAbundantPrecursorPeakIntensity = 0;
float fractionalIntensity = 0;

float missedCleavages = 0;
float longestSeq = 0;
Expand All @@ -791,19 +815,20 @@ public static PsmData CreateOnePsmDataEntry(string searchType, List<(string file
float isLoop = 0;
float isInter = 0;
float isIntra = 0;
float spectralAngle = 0;
float hasSpectralAngle = 0;

double multiplier = 10;
if (searchType != "crosslink")
{
if (searchType == "top-down")
{
normalizationFactor /= 10.0;
normalizationFactor = 1.0;
}
totalMatchingFragmentCount = (float)(Math.Round(psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide].Count / normalizationFactor * 10, 0));
intensity = (float)Math.Min(50, Math.Round((psm.Score - (int)psm.Score) / normalizationFactor * 100.0, 0));
// count only terminal fragment ions
totalMatchingFragmentCount = (float)(Math.Round(psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide].Count(p => p.NeutralTheoreticalProduct.SecondaryProductType == null) / normalizationFactor * multiplier, 0));
internalMatchingFragmentCount = (float)(Math.Round(psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide].Count(p => p.NeutralTheoreticalProduct.SecondaryProductType != null) / normalizationFactor * multiplier, 0));
intensity = (float)Math.Min(50, Math.Round((psm.Score - (int)psm.Score) / normalizationFactor * Math.Pow(multiplier, 2), 0));
chargeDifference = -Math.Abs(chargeStateMode - psm.ScanPrecursorCharge);
deltaScore = (float)Math.Round(psm.DeltaScore / normalizationFactor * 10.0, 0);
deltaScore = (float)Math.Round(psm.DeltaScore / normalizationFactor * multiplier, 0);
notch = notchToUse;
modCount = Math.Min((float)selectedPeptide.AllModsOneIsNterminus.Keys.Count(), 10);
if (psm.BioPolymersWithSetModsToMatchingFragments[selectedPeptide]?.Count() > 0)
Expand All @@ -812,10 +837,15 @@ public static PsmData CreateOnePsmDataEntry(string searchType, List<(string file
}

ambiguity = Math.Min((float)(psm.BioPolymersWithSetModsToMatchingFragments.Keys.Count - 1), 10);
longestSeq = (float)Math.Round(SpectralMatch.GetLongestIonSeriesBidirectional(psm.BioPolymersWithSetModsToMatchingFragments, selectedPeptide) / normalizationFactor * 10, 0);
complementaryIonCount = (float)Math.Round(SpectralMatch.GetCountComplementaryIons(psm.BioPolymersWithSetModsToMatchingFragments, selectedPeptide) / normalizationFactor * 10, 0);
longestSeq = (float)Math.Round(SpectralMatch.GetLongestIonSeriesBidirectional(psm.BioPolymersWithSetModsToMatchingFragments, selectedPeptide) / normalizationFactor * multiplier, 0);
complementaryIonCount = (float)Math.Round(SpectralMatch.GetCountComplementaryIons(psm.BioPolymersWithSetModsToMatchingFragments, selectedPeptide) / normalizationFactor * multiplier, 0);
isVariantPeptide = PeptideIsVariant(selectedPeptide);
spectralAngle = (float)psm.SpectralAngle;
if (chimeraCountDictionary.TryGetValue(psm.ChimeraIdString, out int val))
chimeraCount = val;
peaksInPrecursorEnvelope = psm.PrecursorScanEnvelopePeakCount;
mostAbundantPrecursorPeakIntensity = (float)Math.Round((float)psm.PrecursorScanIntensity / normalizationFactor * multiplier, 0);
fractionalIntensity = (float)psm.PrecursorFractionalIntensity;

if (PsmHasSpectralAngle(psm))
{
Expand Down Expand Up @@ -927,7 +957,12 @@ public static PsmData CreateOnePsmDataEntry(string searchType, List<(string file
Label = label,

SpectralAngle = spectralAngle,
HasSpectralAngle = hasSpectralAngle
HasSpectralAngle = hasSpectralAngle,
PeaksInPrecursorEnvelope = peaksInPrecursorEnvelope,
ChimeraCount = chimeraCount,
MostAbundantPrecursorPeakIntensity = mostAbundantPrecursorPeakIntensity,
PrecursorFractionalIntensity = fractionalIntensity,
InternalIonCount = internalMatchingFragmentCount,
};

return psm.PsmData_forPEPandPercolator;
Expand Down
55 changes: 51 additions & 4 deletions MetaMorpheus/EngineLayer/FdrAnalysis/PsmData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,36 @@ public class PsmData
{
public static readonly IImmutableDictionary<string, string[]> trainingInfos = new Dictionary<string, string[]>
{
{ "standard", new [] { "TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore", "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "MissedCleavagesCount", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "HydrophobicityZScore", "IsVariantPeptide", "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle" } },
{ "top-down", new [] { "TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore", "Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "SpectralAngle", "HasSpectralAngle" } },
{ "crosslink", new [] { "TotalMatchingFragmentCount", "AbsoluteAverageFragmentMassErrorFromMedian", "PrecursorChargeDiffToMode", "DeltaScore", "AlphaIntensity", "BetaIntensity", "LongestFragmentIonSeries_Alpha", "LongestFragmentIonSeries_Beta", "IsInter", "IsIntra" } }
{
"standard", new[]
{
"TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore",
"Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "MissedCleavagesCount",
"Ambiguity", "LongestFragmentIonSeries", "ComplementaryIonCount", "HydrophobicityZScore",
"IsVariantPeptide", "IsDeadEnd", "IsLoop", "SpectralAngle", "HasSpectralAngle",
}
},

{
"top-down", new[]
{
"TotalMatchingFragmentCount", "Intensity", "PrecursorChargeDiffToMode", "DeltaScore",
"Notch", "ModsCount", "AbsoluteAverageFragmentMassErrorFromMedian", "Ambiguity",
"LongestFragmentIonSeries", "ComplementaryIonCount", "SpectralAngle",
"HasSpectralAngle", "PeaksInPrecursorEnvelope", "ChimeraCount",
"MostAbundantPrecursorPeakIntensity", "PrecursorFractionalIntensity", "InternalIonCount"
}
},

{
"crosslink",
new[]
{
"TotalMatchingFragmentCount", "AbsoluteAverageFragmentMassErrorFromMedian",
"PrecursorChargeDiffToMode", "DeltaScore", "AlphaIntensity", "BetaIntensity",
"LongestFragmentIonSeries_Alpha", "LongestFragmentIonSeries_Beta", "IsInter", "IsIntra"
}
}
}.ToImmutableDictionary();

/// <summary>
Expand Down Expand Up @@ -42,7 +69,12 @@ public class PsmData
{ "IsInter", -1 },
{ "IsIntra", -1 },
{ "SpectralAngle", 1 },
{ "HasSpectralAngle", 1 }
{ "HasSpectralAngle", 1 },
{ "PeaksInPrecursorEnvelope", 1 },
{ "ChimeraCount", -1 },
{ "MostAbundantPrecursorPeakIntensity", 1 },
{ "PrecursorFractionalIntensity", 1 },
{ "InternalIonCount", 1},
}.ToImmutableDictionary();

public string ToString(string searchType)
Expand Down Expand Up @@ -132,5 +164,20 @@ public string ToString(string searchType)

[LoadColumn(23)]
public float HasSpectralAngle { get; set; }

[LoadColumn(24)]
public float PeaksInPrecursorEnvelope { get; set; }

[LoadColumn(25)]
public float ChimeraCount { get; set; }

[LoadColumn(26)]
public float MostAbundantPrecursorPeakIntensity { get; set; }

[LoadColumn(27)]
public float PrecursorFractionalIntensity { get; set; }

[LoadColumn(28)]
public float InternalIonCount { get; set; }
}
}
4 changes: 3 additions & 1 deletion MetaMorpheus/EngineLayer/Ms2ScanWithSpecificMass.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,14 @@ namespace EngineLayer
public class Ms2ScanWithSpecificMass
{
public Ms2ScanWithSpecificMass(MsDataScan mzLibScan, double precursorMonoisotopicPeakMz, int precursorCharge, string fullFilePath, CommonParameters commonParam,
IsotopicEnvelope[] neutralExperimentalFragments = null, double? precursorIntensity = null, int? envelopePeakCount = null)
IsotopicEnvelope[] neutralExperimentalFragments = null, double? precursorIntensity = null, int? envelopePeakCount = null, double? precursorFractionalIntensity = null)
{
PrecursorMonoisotopicPeakMz = precursorMonoisotopicPeakMz;
PrecursorCharge = precursorCharge;
PrecursorMass = PrecursorMonoisotopicPeakMz.ToMass(precursorCharge);
PrecursorIntensity = precursorIntensity ?? 1;
PrecursorEnvelopePeakCount = envelopePeakCount ?? 1;
PrecursorFractionalIntensity = precursorFractionalIntensity ?? -1;
FullFilePath = fullFilePath;
ChildScans = new List<Ms2ScanWithSpecificMass>();
NativeId = mzLibScan.NativeId;
Expand All @@ -42,6 +43,7 @@ public Ms2ScanWithSpecificMass(MsDataScan mzLibScan, double precursorMonoisotopi
public int PrecursorCharge { get; }
public double PrecursorIntensity { get; }
public int PrecursorEnvelopePeakCount { get; }
public double PrecursorFractionalIntensity { get; }
public string FullFilePath { get; }
public IsotopicEnvelope[] ExperimentalFragments { get; private set; }
public List<Ms2ScanWithSpecificMass> ChildScans { get; set; } // MS2/MS3 scans that are children of this MS2 scan
Expand Down
14 changes: 14 additions & 0 deletions MetaMorpheus/EngineLayer/SpectralMatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score,
ScanPrecursorMonoisotopicPeakMz = scan.PrecursorMonoisotopicPeakMz;
ScanPrecursorMass = scan.PrecursorMass;
PrecursorScanEnvelopePeakCount = scan.PrecursorEnvelopePeakCount;
PrecursorFractionalIntensity = scan.PrecursorFractionalIntensity;
DigestionParams = commonParameters.DigestionParams;
BioPolymersWithSetModsToMatchingFragments = new Dictionary<IBioPolymerWithSetMods, List<MatchedFragmentIon>>();
Xcorr = xcorr;
Expand Down Expand Up @@ -67,6 +68,7 @@ protected SpectralMatch(IBioPolymerWithSetMods peptide, int notch, double score,
public double ScanPrecursorMonoisotopicPeakMz { get; }
public double PrecursorScanIntensity { get; }
public int PrecursorScanEnvelopePeakCount { get; }
public double PrecursorFractionalIntensity { get; }
public double ScanPrecursorMass { get; }
public string FullFilePath { get; private set; }
public int ScanIndex { get; }
Expand Down Expand Up @@ -379,7 +381,17 @@ protected SpectralMatch(SpectralMatch psm, List<(int Notch, IBioPolymerWithSetMo

#endregion

#region FDR

private string _chimeraIdString;
public string ChimeraIdString => _chimeraIdString ??= $"{ScanNumber}{FullFilePath}{PrecursorScanNumber}";

/// <summary>
/// Returns an integer representing the longest continuous number of residues in the match covered on both sides by fragment ions
/// </summary>
/// <param name="PeptidesToMatchingFragments"></param>
/// <param name="peptide"></param>
/// <returns></returns>
public static int GetLongestIonSeriesBidirectional(Dictionary<IBioPolymerWithSetMods, List<MatchedFragmentIon>> PeptidesToMatchingFragments, IBioPolymerWithSetMods peptide)
{
List<int> maxDiffs = new List<int> { 1 };
Expand Down Expand Up @@ -531,6 +543,8 @@ public static int GetCountComplementaryIons(Dictionary<IBioPolymerWithSetMods, L
}
}

#endregion

/// <summary>
/// There are a few key locations in MetaMorpheus where we want to have psms sorted in a consistent manner.
/// These are for q-value determination and for when we write the psms to psmtsv.
Expand Down
Loading

0 comments on commit eb69623

Please sign in to comment.