Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for crux output #774

Merged
merged 8 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions mzLib/MzLibUtil/ClassExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,15 @@ public static bool AllSame<T>(this IEnumerable<T> list)
return true;
}

/// <summary>
/// Extension method to invoke the GetPeriodTolerantFileNameWithoutExtension method
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public static string GetPeriodTolerantFilenameWithoutExtension(this string filePath)
{
return PeriodTolerantFilenameWithoutExtension.GetPeriodTolerantFilenameWithoutExtension(filePath);
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
using System.Globalization;
using System.Text;
using CsvHelper.Configuration;
using CsvHelper.Configuration.Attributes;
using MzLibUtil;

namespace Readers
{
public class CruxResult
{
public static CsvConfiguration CsvConfiguration => new CsvConfiguration(CultureInfo.InvariantCulture)
{
Encoding = Encoding.UTF8,
HasHeaderRecord = true,
Delimiter = "\t",
};

[Name("file")]
public string FilePath { get; set; }

[Name("scan")]
public int OneBasedScanNumber { get; set; }

[Name("charge")]
public int Charge { get; set; }

[Name("retention time")]
public double RetentionTime { get; set; }

[Name("spectrum precursor m/z")]
public double PrecursorMz { get; set; }

[Name("spectrum neutral mass")]
public double NeutralMass { get; set; }

[Name("peptide mass")]
public double PeptideMass { get; set; }

[Name("delta_cn")]
public double DeltaCn { get; set; }

[Name("xcorr score")]
public double XCorrScore { get; set; }

[Name("xcorr rank")]
public int XCorrRank { get; set; }

[Name("tailor score")]
public double TailorScore { get; set; }

[Name("tdc q-value")]
public double TdcQValue { get; set; }

[Name("b/y ions matched")]
public int BAndYIonsMatched { get; set; }

[Name("b/y ions total")]
public int BAndYIonsTotal { get; set; }

[Name("b/y ions fraction")]
public double BAndYIonsFraction { get; set; }

[Name("b/y ion repeat match")]
public int BAndYIonRepeatMatch { get; set; }

[Name("distinct matches/spectrum")]
public int DistinctMatchesPerSpectrum { get; set; }

[Name("sequence")]
public string FullSequence { get; set; }

[Name("unmodified sequence")]
public string BaseSequence { get; set; }

[Name("protein id")]
public string ProteinId { get; set; }

[Name("flanking aa")]
public string FlankingAa { get; set; }

#region Interpreted properties

[Ignore] private string? _fileNameWithoutExtension = null;
[Ignore] public string FileNameWithoutExtension => _fileNameWithoutExtension ??= FilePath.GetPeriodTolerantFilenameWithoutExtension();

[Ignore] private string? _accession = null;
[Ignore] public string Accession => _accession ??= ProteinId.Split('|')[1].Trim();

#endregion
}
}
35 changes: 35 additions & 0 deletions mzLib/Readers/ExternalResults/ResultFiles/CruxResultFile.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

namespace Readers
{
public class CruxResultFile : ResultFile<CruxResult>, IResultFile
{
public override SupportedFileType FileType => SupportedFileType.CruxResult;
public override Software Software { get; set; }

public CruxResultFile(string filePath) : base(filePath, Software.Crux) { }

public CruxResultFile() : base() { }

public override void LoadResults()
{
using var csv = new CsvHelper.CsvReader(new StreamReader(FilePath), CruxResult.CsvConfiguration);
Results = csv.GetRecords<CruxResult>().ToList();
}

public override void WriteResults(string outputPath)
{
if (!CanRead(FilePath))
outputPath += FileType.GetFileExtension();

Check warning on line 22 in mzLib/Readers/ExternalResults/ResultFiles/CruxResultFile.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/Readers/ExternalResults/ResultFiles/CruxResultFile.cs#L22

Added line #L22 was not covered by tests

using (var csv = new CsvHelper.CsvWriter(new StreamWriter(File.Create(outputPath)), CruxResult.CsvConfiguration))
{
csv.WriteHeader<CruxResult>();
foreach (var result in Results)
{
csv.NextRecord();
csv.WriteRecord(result);
}
}
}
}
}
1 change: 1 addition & 0 deletions mzLib/Readers/Util/Software.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ public enum Software
MaxQuant,
Toppic,
MsFragger, // files tested were from fragpipe v21.1
Crux
}
}
7 changes: 7 additions & 0 deletions mzLib/Readers/Util/SupportedFileTypes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
MsFraggerPsm,
MsFraggerPeptide,
MsFraggerProtein,
CruxResult
}

public static class SupportedFileTypeExtensions
Expand Down Expand Up @@ -54,6 +55,7 @@
SupportedFileType.MsFraggerPsm => "psm.tsv",
SupportedFileType.MsFraggerPeptide => "peptide.tsv",
SupportedFileType.MsFraggerProtein => "protein.tsv",
SupportedFileType.CruxResult => ".txt",
_ => throw new MzLibException("File type not supported")
};
}
Expand Down Expand Up @@ -111,6 +113,11 @@
throw new MzLibException("Tsv file type not supported");
}

case ".txt":
if (filePath.EndsWith(SupportedFileType.CruxResult.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase))
return SupportedFileType.CruxResult;
throw new MzLibException("Txt file type not supported");

Check warning on line 119 in mzLib/Readers/Util/SupportedFileTypes.cs

View check run for this annotation

Codecov / codecov/patch

mzLib/Readers/Util/SupportedFileTypes.cs#L119

Added line #L119 was not covered by tests

default:
throw new MzLibException("File type not supported");
}
Expand Down
15 changes: 15 additions & 0 deletions mzLib/Test/FileReadingTests/ExternalFileTypes/crux.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
file scan charge retention time spectrum precursor m/z spectrum neutral mass peptide mass delta_cn xcorr score xcorr rank tailor score tdc q-value b/y ions matched b/y ions total b/y ions fraction b/y ion repeat match distinct matches/spectrum sequence unmodified sequence protein id flanking aa
/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML 14674 3 2747.6599 1075.1815 3222.5227 3222.5222 0.84335566 6.4364114 1 1.9659604 3.8850189e-06 51 116 0.43965518 0 68 RPQYSNPPVQGEVMEGADNQGAGEQGRPVR RPQYSNPPVQGEVMEGADNQGAGEQGRPVR sp|P67809|YBOX1_HUMAN(205) RQ
/hdd/data/PXD005590/B02_20_161103_E4_HCD_OT_4ul.raw.mzXML 15417 3 2814.6499 1075.182 3222.5242 3222.5222 0.86036599 6.3186069 1 1.9550625 3.8850189e-06 48 116 0.41379309 3 68 RPQYSNPPVQGEVMEGADNQGAGEQGRPVR RPQYSNPPVQGEVMEGADNQGAGEQGRPVR sp|P67809|YBOX1_HUMAN(205) RQ
/hdd/data/PXD005590/B02_18_161103_B4_HCD_OT_4ul.raw.mzXML 6847 4 2012.87 918.185 3668.7109 3668.7124 0.83817238 6.7191076 1 1.9478002 3.8850189e-06 53 191 0.27748692 0 64 AASAAGAAGSAGGSSGAAGAAGGGAGAGTRPGDGGTASAGAAGPGAATK AASAAGAAGSAGGSSGAAGAAGGGAGAGTRPGDGGTASAGAAGPGAATK sp|Q9UKY7|CDV3_HUMAN(28) RA
/hdd/data/PXD005590/B02_06_161103_A1_HCD_OT_4ul.raw.mzXML 74906 3 8146.6001 1004.5292 3010.5659 3010.5623 0.86094695 6.1447253 1 1.9289217 3.8850189e-06 39 116 0.33620691 0 122 HIADLAGNSEVILPVPAFNVINGGSHAGNK HIADLAGNSEVILPVPAFNVINGGSHAGNK sp|P06733|ENOA_HUMAN(133) RL
/hdd/data/PXD005590/B02_22_161103_D1_HCD_OT_4ul.raw.mzXML 65300 3 7277.5698 867.7704 2600.2896 2600.2869 0.86649311 6.2026334 1 1.9265088 3.8850189e-06 37 96 0.38541666 0 160 NHDTGVSPVFAGGVEYAITPEIATR NHDTGVSPVFAGGVEYAITPEIATR sp|P0A910|OMPA_ECOLI(135) KL
/hdd/data/PXD005590/B02_11_161103_D2_HCD_OT_4ul.raw.mzXML 32062 4 4347.98 668.6035 2670.3848 2670.3875 0.82502377 6.3233223 1 1.9088538 3.8850189e-06 41 95 0.43157896 0 102 EEHEVAVLGAPHNPAPPTSTVIHIR EEHEVAVLGAPHNPAPPTSTVIHIR sp|Q01628|IFM3_HUMAN(25) KS
/hdd/data/PXD005590/B02_16_161103_A3_HCD_OT_4ul.raw.mzXML 51309 4 6030.9102 884.4734 3533.8647 3533.8586 0.86957496 6.0390177 1 1.9073439 3.8850189e-06 35 139 0.25179857 0 26 AHSSPASLQLGAVSPGTLTPTGVVSGPAATPTAQHLR AHSSPASLQLGAVSPGTLTPTGVVSGPAATPTAQHLR sp|P46937|YAP1_HUMAN(125) RQ
/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML 59327 4 6765.9102 771.1469 3080.5586 3080.561 0.81879568 6.7819619 1 1.9058784 3.8850189e-06 43 127 0.33858269 0 82 GAAAQGQTQTVAAQAQALAAQAAAAAHAAQAHR GAAAQGQTQTVAAQAQALAAQAAAAAHAAQAHR sp|Q9BTU6|P4K2A_HUMAN(67) RE
/hdd/data/PXD005590/B02_06_161103_A1_HCD_OT_4ul.raw.mzXML 4435 3 1815.64 751.6635 2251.9685 2251.9666 0.84570491 6.0909443 1 1.9016477 3.8850189e-06 38 92 0.41304347 0 112 APKPDGPGGGPGGSHMGGNYGDDR APKPDGPGGGPGGSHMGGNYGDDR sp|P35637|FUS_HUMAN(449) KR
/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML 19161 3 3172.6001 827.3565 2479.0476 2479.0457 0.86390048 5.704639 1 1.8994089 3.8850189e-06 38 92 0.41304347 0 52 QDHPSSMGVYGQESGGFSGPGENR QDHPSSMGVYGQESGGFSGPGENR sp|Q01844|EWS_HUMAN(269) RS
/hdd/data/PXD005590/B02_24_161103_C1_HCD_OT_4ul.raw.mzXML 58893 2 6700.6001 1396.1667 2790.3188 2790.3218 0.85683089 5.7747865 1 1.897424 3.8850189e-06 31 52 0.59615386 3 134 HTGPGILSMANAGPNTNGSQFFICTAK HTGPGILSMANAGPNTNGSQFFICTAK sp|P62937|PPIA_HUMAN(92) KT
/hdd/data/PXD005590/B02_001_161103_B1_HCD_OT_4ul.raw.mzXML 17264 3 2921.5601 1075.1821 3222.5247 3222.5222 0.81186515 5.8244729 1 1.8960458 3.8850189e-06 49 116 0.4224138 0 68 RPQYSNPPVQGEVMEGADNQGAGEQGRPVR RPQYSNPPVQGEVMEGADNQGAGEQGRPVR sp|P67809|YBOX1_HUMAN(205) RQ
/hdd/data/PXD005590/B02_19_161103_C4_HCD_OT_4ul.raw.mzXML 72508 3 7969.2202 1004.5298 3010.5674 3010.5623 0.85636955 5.7181306 1 1.894421 3.8850189e-06 36 116 0.31034482 0 122 HIADLAGNSEVILPVPAFNVINGGSHAGNK HIADLAGNSEVILPVPAFNVINGGSHAGNK sp|P06733|ENOA_HUMAN(133) RL
/hdd/data/PXD005590/B02_20_161103_E4_HCD_OT_4ul.raw.mzXML 19752 3 3220.75 827.3577 2479.0515 2479.0457 0.85362118 5.682076 1 1.8917845 3.8850189e-06 32 92 0.34782609 0 52 QDHPSSMGVYGQESGGFSGPGENR QDHPSSMGVYGQESGGFSGPGENR sp|Q01844|EWS_HUMAN(269) RS
145 changes: 145 additions & 0 deletions mzLib/Test/FileReadingTests/TestCruxReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
using NUnit.Framework;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using Newtonsoft.Json;
using Readers;

namespace Test.FileReadingTests
{
[TestFixture]
[ExcludeFromCodeCoverage]
internal class TestCruxReader
{
private static string directoryPath;

[OneTimeSetUp]
public void SetUp()
{
directoryPath = Path.Combine(TestContext.CurrentContext.TestDirectory,
@"FileReadingTests\ReadingWritingTests");
Directory.CreateDirectory(directoryPath);
}

[OneTimeTearDown]
public void TearDown()
{
Directory.Delete(directoryPath, true);
}

[Test]
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt", 14)]
public void TestCruxResultsLoadsAndCountCorrect(string path, int recordCount)
{
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path);
CruxResultFile file = new CruxResultFile(filePath);
Assert.That(file.Count(), Is.EqualTo(recordCount));
Assert.That(file.CanRead(path));
}

[Test]
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt", 14)]
public static void TestCruxResultsFromGenericReader(string path, int recordCount)
{
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path);
var constructedFile = new CruxResultFile(filePath);
var genericFile = FileReader.ReadFile<CruxResultFile>(filePath);

Assert.That(genericFile.Count(), Is.EqualTo(recordCount));
Assert.That(genericFile.Count(), Is.EqualTo(constructedFile.Count()));
Assert.That(genericFile.FilePath, Is.EqualTo(constructedFile.FilePath));
}

[Test]
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt")]
public void TestCruxResultsFirstAndLastAreCorrect(string path)
{
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path);
var file = new CruxResultFile(filePath);

var first = file.First();
var last = file.Last();

Assert.That(first.FilePath, Is.EqualTo(@"/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML"));
Assert.That(first.OneBasedScanNumber, Is.EqualTo(14674));
Assert.That(first.Charge, Is.EqualTo(3));
Assert.That(first.RetentionTime, Is.EqualTo(2747.6599));
Assert.That(first.PrecursorMz, Is.EqualTo(1075.1815));
Assert.That(first.NeutralMass, Is.EqualTo(3222.5227));
Assert.That(first.PeptideMass, Is.EqualTo(3222.5222));
Assert.That(first.DeltaCn, Is.EqualTo(0.84335566));
Assert.That(first.XCorrScore, Is.EqualTo(6.4364114));
Assert.That(first.XCorrRank, Is.EqualTo(1));
Assert.That(first.TailorScore, Is.EqualTo(1.9659604));
Assert.That(first.TdcQValue, Is.EqualTo(0.0000038850189).Within(1E-6));
Assert.That(first.BAndYIonsMatched, Is.EqualTo(51));
Assert.That(first.BAndYIonsTotal, Is.EqualTo(116));
Assert.That(first.BAndYIonsFraction, Is.EqualTo(0.43965518));
Assert.That(first.BAndYIonRepeatMatch, Is.EqualTo(0));
Assert.That(first.BaseSequence, Is.EqualTo("RPQYSNPPVQGEVMEGADNQGAGEQGRPVR"));
Assert.That(first.FullSequence, Is.EqualTo("RPQYSNPPVQGEVMEGADNQGAGEQGRPVR"));
Assert.That(first.ProteinId, Is.EqualTo("sp|P67809|YBOX1_HUMAN(205)"));
Assert.That(first.FlankingAa, Is.EqualTo("RQ"));
Assert.That(first.FileNameWithoutExtension, Is.EqualTo("B02_21_161103_D4_HCD_OT_4ul.raw"));
Assert.That(first.Accession, Is.EqualTo("P67809"));

Assert.That(last.FilePath, Is.EqualTo(@"/hdd/data/PXD005590/B02_20_161103_E4_HCD_OT_4ul.raw.mzXML"));
Assert.That(last.OneBasedScanNumber, Is.EqualTo(19752));
Assert.That(last.Charge, Is.EqualTo(3));
Assert.That(last.RetentionTime, Is.EqualTo(3220.75));
Assert.That(last.PrecursorMz, Is.EqualTo(827.3577));
Assert.That(last.NeutralMass, Is.EqualTo(2479.0515));
Assert.That(last.PeptideMass, Is.EqualTo(2479.0457));
Assert.That(last.DeltaCn, Is.EqualTo(0.85362118));
Assert.That(last.XCorrScore, Is.EqualTo(5.682076));
Assert.That(last.XCorrRank, Is.EqualTo(1));
Assert.That(last.TailorScore, Is.EqualTo(1.8917845));
Assert.That(last.TdcQValue, Is.EqualTo(0.00000388501896).Within(1E-6));
Assert.That(last.BAndYIonsMatched, Is.EqualTo(32));
Assert.That(last.BAndYIonsTotal, Is.EqualTo(92));
Assert.That(last.BAndYIonsFraction, Is.EqualTo(0.34782609));
Assert.That(last.BAndYIonRepeatMatch, Is.EqualTo(0));
Assert.That(last.BaseSequence, Is.EqualTo("QDHPSSMGVYGQESGGFSGPGENR"));
Assert.That(last.FullSequence, Is.EqualTo("QDHPSSMGVYGQESGGFSGPGENR"));
Assert.That(last.ProteinId, Is.EqualTo("sp|Q01844|EWS_HUMAN(269)"));
Assert.That(last.FlankingAa, Is.EqualTo("RS"));
Assert.That(last.FileNameWithoutExtension, Is.EqualTo("B02_20_161103_E4_HCD_OT_4ul.raw"));
Assert.That(last.Accession, Is.EqualTo("Q01844"));
}


[Test]
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt")]
public void TestCruxResultsWriteResults(string path)
{
// load in original
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path);
var original = new CruxResultFile(filePath);

// write out original
var outputPath = Path.Combine(directoryPath, "cruxResults.csv");
original.WriteResults(outputPath);
Assert.That(File.Exists(outputPath));

// read in new original
var written = new CruxResultFile(outputPath);
Assert.That(written.Count(), Is.EqualTo(original.Count()));

// check are equivalent
for (int i = 0; i < original.Count(); i++)
{
var oldRecord = JsonConvert.SerializeObject(original.Results[i]);
var newRecord = JsonConvert.SerializeObject(written.Results[i]);
Assert.That(oldRecord, Is.EqualTo(newRecord));
}

// test writer still works without specifying extensions
var outputPathWithoutExtension = Path.Combine(directoryPath, "cruxResults");
original.WriteResults(outputPathWithoutExtension);
Assert.That(File.Exists(outputPathWithoutExtension + ".csv"));

var writtenWithoutExtension = new CruxResultFile(outputPathWithoutExtension + ".csv");
Assert.That(writtenWithoutExtension.Count(), Is.EqualTo(original.Count()));
}
}
}
2 changes: 1 addition & 1 deletion mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ internal class TestSupportedFileExtensions
[TestCase(@"FileReadingTests\ExternalFileTypes\FraggerProtein_FragPipev21.1individual_protein.tsv", SupportedFileType.MsFraggerProtein)]
[TestCase(@"FileReadingTests\ExternalFileTypes\FraggerPeptide_FragPipev21.1combined_peptide.tsv", SupportedFileType.MsFraggerPeptide)]
[TestCase(@"FileReadingTests\ExternalFileTypes\FraggerProtein_FragPipev21.1combined_protein.tsv", SupportedFileType.MsFraggerProtein)]

[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt", SupportedFileType.CruxResult)]
public static void TestSupportedFileTypeExtensions(string filePath, SupportedFileType expectedType)
{
var supportedType = filePath.ParseFileType();
Expand Down
3 changes: 3 additions & 0 deletions mzLib/Test/Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@
<None Update="DataFiles\BinGenerationTest.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="FileReadingTests\ExternalFileTypes\crux.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="FileReadingTests\ExternalFileTypes\FraggerPeptide_FragPipev21.1combined_peptide.tsv">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
Expand Down
3 changes: 3 additions & 0 deletions mzLib/Test/TestMzLibUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,13 @@ public sealed class TestMzLibUtil
[TestCase("penguin", "penguin")]
[TestCase("penguin.jpg.gz", "penguin")]
[TestCase("penguin.jpg.zip", "penguin")]
[TestCase("penguin.jpg.mzXML", "penguin.jpg")]
public static void TestPeriodTolerantFilenameWithoutExtension(string filenameAndOrPath, string expectedResult)
{
string result = PeriodTolerantFilenameWithoutExtension.GetPeriodTolerantFilenameWithoutExtension(filenameAndOrPath);
string extensionResult = filenameAndOrPath.GetPeriodTolerantFilenameWithoutExtension();
Assert.AreEqual(expectedResult, result);
Assert.AreEqual(expectedResult, extensionResult);
}
}
}
Loading