Skip to content

Commit

Permalink
Added support for crux output (smith-chem-wisc#774)
Browse files Browse the repository at this point in the history
* Added support for crux output

* cleaned up code and added one test

* reverted change to period tolerant file name without extension

---------

Co-authored-by: Edwin Laboy <63374885+elaboy@users.noreply.github.com>
Co-authored-by: trishorts <mshort@chem.wisc.edu>
  • Loading branch information
3 people committed Jul 31, 2024
1 parent 20db903 commit bbbe9f2
Show file tree
Hide file tree
Showing 10 changed files with 311 additions and 1 deletion.
10 changes: 10 additions & 0 deletions mzLib/MzLibUtil/ClassExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,15 @@ public static bool AllSame<T>(this IEnumerable<T> list)
return true;
}

/// <summary>
/// Extension method to invoke the GetPeriodTolerantFileNameWithoutExtension method
/// </summary>
/// <param name="filePath"></param>
/// <returns></returns>
public static string GetPeriodTolerantFilenameWithoutExtension(this string filePath)
{
return PeriodTolerantFilenameWithoutExtension.GetPeriodTolerantFilenameWithoutExtension(filePath);
}

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
using System.Globalization;
using System.Text;
using CsvHelper.Configuration;
using CsvHelper.Configuration.Attributes;
using MzLibUtil;

namespace Readers
{
public class CruxResult
{
public static CsvConfiguration CsvConfiguration => new CsvConfiguration(CultureInfo.InvariantCulture)
{
Encoding = Encoding.UTF8,
HasHeaderRecord = true,
Delimiter = "\t",
};

[Name("file")]
public string FilePath { get; set; }

[Name("scan")]
public int OneBasedScanNumber { get; set; }

[Name("charge")]
public int Charge { get; set; }

[Name("retention time")]
public double RetentionTime { get; set; }

[Name("spectrum precursor m/z")]
public double PrecursorMz { get; set; }

[Name("spectrum neutral mass")]
public double NeutralMass { get; set; }

[Name("peptide mass")]
public double PeptideMass { get; set; }

[Name("delta_cn")]
public double DeltaCn { get; set; }

[Name("xcorr score")]
public double XCorrScore { get; set; }

[Name("xcorr rank")]
public int XCorrRank { get; set; }

[Name("tailor score")]
public double TailorScore { get; set; }

[Name("tdc q-value")]
public double TdcQValue { get; set; }

[Name("b/y ions matched")]
public int BAndYIonsMatched { get; set; }

[Name("b/y ions total")]
public int BAndYIonsTotal { get; set; }

[Name("b/y ions fraction")]
public double BAndYIonsFraction { get; set; }

[Name("b/y ion repeat match")]
public int BAndYIonRepeatMatch { get; set; }

[Name("distinct matches/spectrum")]
public int DistinctMatchesPerSpectrum { get; set; }

[Name("sequence")]
public string FullSequence { get; set; }

[Name("unmodified sequence")]
public string BaseSequence { get; set; }

[Name("protein id")]
public string ProteinId { get; set; }

[Name("flanking aa")]
public string FlankingAa { get; set; }

#region Interpreted properties

[Ignore] private string? _fileNameWithoutExtension = null;
[Ignore] public string FileNameWithoutExtension => _fileNameWithoutExtension ??= FilePath.GetPeriodTolerantFilenameWithoutExtension();

[Ignore] private string? _accession = null;
[Ignore] public string Accession => _accession ??= ProteinId.Split('|')[1].Trim();

#endregion
}
}
35 changes: 35 additions & 0 deletions mzLib/Readers/ExternalResults/ResultFiles/CruxResultFile.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@

namespace Readers
{
public class CruxResultFile : ResultFile<CruxResult>, IResultFile
{
public override SupportedFileType FileType => SupportedFileType.CruxResult;
public override Software Software { get; set; }

public CruxResultFile(string filePath) : base(filePath, Software.Crux) { }

public CruxResultFile() : base() { }

public override void LoadResults()
{
using var csv = new CsvHelper.CsvReader(new StreamReader(FilePath), CruxResult.CsvConfiguration);
Results = csv.GetRecords<CruxResult>().ToList();
}

public override void WriteResults(string outputPath)
{
if (!CanRead(FilePath))
outputPath += FileType.GetFileExtension();

using (var csv = new CsvHelper.CsvWriter(new StreamWriter(File.Create(outputPath)), CruxResult.CsvConfiguration))
{
csv.WriteHeader<CruxResult>();
foreach (var result in Results)
{
csv.NextRecord();
csv.WriteRecord(result);
}
}
}
}
}
1 change: 1 addition & 0 deletions mzLib/Readers/Util/Software.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,6 @@ public enum Software
MaxQuant,
Toppic,
MsFragger, // files tested were from fragpipe v21.1
Crux
}
}
7 changes: 7 additions & 0 deletions mzLib/Readers/Util/SupportedFileTypes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ public enum SupportedFileType
MsFraggerPsm,
MsFraggerPeptide,
MsFraggerProtein,
CruxResult
}

public static class SupportedFileTypeExtensions
Expand Down Expand Up @@ -54,6 +55,7 @@ public static string GetFileExtension(this SupportedFileType type)
SupportedFileType.MsFraggerPsm => "psm.tsv",
SupportedFileType.MsFraggerPeptide => "peptide.tsv",
SupportedFileType.MsFraggerProtein => "protein.tsv",
SupportedFileType.CruxResult => ".txt",
_ => throw new MzLibException("File type not supported")
};
}
Expand Down Expand Up @@ -111,6 +113,11 @@ public static SupportedFileType ParseFileType(this string filePath)
throw new MzLibException("Tsv file type not supported");
}

case ".txt":
if (filePath.EndsWith(SupportedFileType.CruxResult.GetFileExtension(), StringComparison.InvariantCultureIgnoreCase))
return SupportedFileType.CruxResult;
throw new MzLibException("Txt file type not supported");

default:
throw new MzLibException("File type not supported");
}
Expand Down
15 changes: 15 additions & 0 deletions mzLib/Test/FileReadingTests/ExternalFileTypes/crux.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
file scan charge retention time spectrum precursor m/z spectrum neutral mass peptide mass delta_cn xcorr score xcorr rank tailor score tdc q-value b/y ions matched b/y ions total b/y ions fraction b/y ion repeat match distinct matches/spectrum sequence unmodified sequence protein id flanking aa
/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML 14674 3 2747.6599 1075.1815 3222.5227 3222.5222 0.84335566 6.4364114 1 1.9659604 3.8850189e-06 51 116 0.43965518 0 68 RPQYSNPPVQGEVMEGADNQGAGEQGRPVR RPQYSNPPVQGEVMEGADNQGAGEQGRPVR sp|P67809|YBOX1_HUMAN(205) RQ
/hdd/data/PXD005590/B02_20_161103_E4_HCD_OT_4ul.raw.mzXML 15417 3 2814.6499 1075.182 3222.5242 3222.5222 0.86036599 6.3186069 1 1.9550625 3.8850189e-06 48 116 0.41379309 3 68 RPQYSNPPVQGEVMEGADNQGAGEQGRPVR RPQYSNPPVQGEVMEGADNQGAGEQGRPVR sp|P67809|YBOX1_HUMAN(205) RQ
/hdd/data/PXD005590/B02_18_161103_B4_HCD_OT_4ul.raw.mzXML 6847 4 2012.87 918.185 3668.7109 3668.7124 0.83817238 6.7191076 1 1.9478002 3.8850189e-06 53 191 0.27748692 0 64 AASAAGAAGSAGGSSGAAGAAGGGAGAGTRPGDGGTASAGAAGPGAATK AASAAGAAGSAGGSSGAAGAAGGGAGAGTRPGDGGTASAGAAGPGAATK sp|Q9UKY7|CDV3_HUMAN(28) RA
/hdd/data/PXD005590/B02_06_161103_A1_HCD_OT_4ul.raw.mzXML 74906 3 8146.6001 1004.5292 3010.5659 3010.5623 0.86094695 6.1447253 1 1.9289217 3.8850189e-06 39 116 0.33620691 0 122 HIADLAGNSEVILPVPAFNVINGGSHAGNK HIADLAGNSEVILPVPAFNVINGGSHAGNK sp|P06733|ENOA_HUMAN(133) RL
/hdd/data/PXD005590/B02_22_161103_D1_HCD_OT_4ul.raw.mzXML 65300 3 7277.5698 867.7704 2600.2896 2600.2869 0.86649311 6.2026334 1 1.9265088 3.8850189e-06 37 96 0.38541666 0 160 NHDTGVSPVFAGGVEYAITPEIATR NHDTGVSPVFAGGVEYAITPEIATR sp|P0A910|OMPA_ECOLI(135) KL
/hdd/data/PXD005590/B02_11_161103_D2_HCD_OT_4ul.raw.mzXML 32062 4 4347.98 668.6035 2670.3848 2670.3875 0.82502377 6.3233223 1 1.9088538 3.8850189e-06 41 95 0.43157896 0 102 EEHEVAVLGAPHNPAPPTSTVIHIR EEHEVAVLGAPHNPAPPTSTVIHIR sp|Q01628|IFM3_HUMAN(25) KS
/hdd/data/PXD005590/B02_16_161103_A3_HCD_OT_4ul.raw.mzXML 51309 4 6030.9102 884.4734 3533.8647 3533.8586 0.86957496 6.0390177 1 1.9073439 3.8850189e-06 35 139 0.25179857 0 26 AHSSPASLQLGAVSPGTLTPTGVVSGPAATPTAQHLR AHSSPASLQLGAVSPGTLTPTGVVSGPAATPTAQHLR sp|P46937|YAP1_HUMAN(125) RQ
/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML 59327 4 6765.9102 771.1469 3080.5586 3080.561 0.81879568 6.7819619 1 1.9058784 3.8850189e-06 43 127 0.33858269 0 82 GAAAQGQTQTVAAQAQALAAQAAAAAHAAQAHR GAAAQGQTQTVAAQAQALAAQAAAAAHAAQAHR sp|Q9BTU6|P4K2A_HUMAN(67) RE
/hdd/data/PXD005590/B02_06_161103_A1_HCD_OT_4ul.raw.mzXML 4435 3 1815.64 751.6635 2251.9685 2251.9666 0.84570491 6.0909443 1 1.9016477 3.8850189e-06 38 92 0.41304347 0 112 APKPDGPGGGPGGSHMGGNYGDDR APKPDGPGGGPGGSHMGGNYGDDR sp|P35637|FUS_HUMAN(449) KR
/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML 19161 3 3172.6001 827.3565 2479.0476 2479.0457 0.86390048 5.704639 1 1.8994089 3.8850189e-06 38 92 0.41304347 0 52 QDHPSSMGVYGQESGGFSGPGENR QDHPSSMGVYGQESGGFSGPGENR sp|Q01844|EWS_HUMAN(269) RS
/hdd/data/PXD005590/B02_24_161103_C1_HCD_OT_4ul.raw.mzXML 58893 2 6700.6001 1396.1667 2790.3188 2790.3218 0.85683089 5.7747865 1 1.897424 3.8850189e-06 31 52 0.59615386 3 134 HTGPGILSMANAGPNTNGSQFFICTAK HTGPGILSMANAGPNTNGSQFFICTAK sp|P62937|PPIA_HUMAN(92) KT
/hdd/data/PXD005590/B02_001_161103_B1_HCD_OT_4ul.raw.mzXML 17264 3 2921.5601 1075.1821 3222.5247 3222.5222 0.81186515 5.8244729 1 1.8960458 3.8850189e-06 49 116 0.4224138 0 68 RPQYSNPPVQGEVMEGADNQGAGEQGRPVR RPQYSNPPVQGEVMEGADNQGAGEQGRPVR sp|P67809|YBOX1_HUMAN(205) RQ
/hdd/data/PXD005590/B02_19_161103_C4_HCD_OT_4ul.raw.mzXML 72508 3 7969.2202 1004.5298 3010.5674 3010.5623 0.85636955 5.7181306 1 1.894421 3.8850189e-06 36 116 0.31034482 0 122 HIADLAGNSEVILPVPAFNVINGGSHAGNK HIADLAGNSEVILPVPAFNVINGGSHAGNK sp|P06733|ENOA_HUMAN(133) RL
/hdd/data/PXD005590/B02_20_161103_E4_HCD_OT_4ul.raw.mzXML 19752 3 3220.75 827.3577 2479.0515 2479.0457 0.85362118 5.682076 1 1.8917845 3.8850189e-06 32 92 0.34782609 0 52 QDHPSSMGVYGQESGGFSGPGENR QDHPSSMGVYGQESGGFSGPGENR sp|Q01844|EWS_HUMAN(269) RS
145 changes: 145 additions & 0 deletions mzLib/Test/FileReadingTests/TestCruxReader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
using NUnit.Framework;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
using Newtonsoft.Json;
using Readers;

namespace Test.FileReadingTests
{
[TestFixture]
[ExcludeFromCodeCoverage]
internal class TestCruxReader
{
private static string directoryPath;

[OneTimeSetUp]
public void SetUp()
{
directoryPath = Path.Combine(TestContext.CurrentContext.TestDirectory,
@"FileReadingTests\ReadingWritingTests");
Directory.CreateDirectory(directoryPath);
}

[OneTimeTearDown]
public void TearDown()
{
Directory.Delete(directoryPath, true);
}

[Test]
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt", 14)]
public void TestCruxResultsLoadsAndCountCorrect(string path, int recordCount)
{
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path);
CruxResultFile file = new CruxResultFile(filePath);
Assert.That(file.Count(), Is.EqualTo(recordCount));
Assert.That(file.CanRead(path));
}

[Test]
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt", 14)]
public static void TestCruxResultsFromGenericReader(string path, int recordCount)
{
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path);
var constructedFile = new CruxResultFile(filePath);
var genericFile = FileReader.ReadFile<CruxResultFile>(filePath);

Assert.That(genericFile.Count(), Is.EqualTo(recordCount));
Assert.That(genericFile.Count(), Is.EqualTo(constructedFile.Count()));
Assert.That(genericFile.FilePath, Is.EqualTo(constructedFile.FilePath));
}

[Test]
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt")]
public void TestCruxResultsFirstAndLastAreCorrect(string path)
{
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path);
var file = new CruxResultFile(filePath);

var first = file.First();
var last = file.Last();

Assert.That(first.FilePath, Is.EqualTo(@"/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML"));
Assert.That(first.OneBasedScanNumber, Is.EqualTo(14674));
Assert.That(first.Charge, Is.EqualTo(3));
Assert.That(first.RetentionTime, Is.EqualTo(2747.6599));
Assert.That(first.PrecursorMz, Is.EqualTo(1075.1815));
Assert.That(first.NeutralMass, Is.EqualTo(3222.5227));
Assert.That(first.PeptideMass, Is.EqualTo(3222.5222));
Assert.That(first.DeltaCn, Is.EqualTo(0.84335566));
Assert.That(first.XCorrScore, Is.EqualTo(6.4364114));
Assert.That(first.XCorrRank, Is.EqualTo(1));
Assert.That(first.TailorScore, Is.EqualTo(1.9659604));
Assert.That(first.TdcQValue, Is.EqualTo(0.0000038850189).Within(1E-6));
Assert.That(first.BAndYIonsMatched, Is.EqualTo(51));
Assert.That(first.BAndYIonsTotal, Is.EqualTo(116));
Assert.That(first.BAndYIonsFraction, Is.EqualTo(0.43965518));
Assert.That(first.BAndYIonRepeatMatch, Is.EqualTo(0));
Assert.That(first.BaseSequence, Is.EqualTo("RPQYSNPPVQGEVMEGADNQGAGEQGRPVR"));
Assert.That(first.FullSequence, Is.EqualTo("RPQYSNPPVQGEVMEGADNQGAGEQGRPVR"));
Assert.That(first.ProteinId, Is.EqualTo("sp|P67809|YBOX1_HUMAN(205)"));
Assert.That(first.FlankingAa, Is.EqualTo("RQ"));
Assert.That(first.FileNameWithoutExtension, Is.EqualTo("B02_21_161103_D4_HCD_OT_4ul.raw"));
Assert.That(first.Accession, Is.EqualTo("P67809"));

Assert.That(last.FilePath, Is.EqualTo(@"/hdd/data/PXD005590/B02_20_161103_E4_HCD_OT_4ul.raw.mzXML"));
Assert.That(last.OneBasedScanNumber, Is.EqualTo(19752));
Assert.That(last.Charge, Is.EqualTo(3));
Assert.That(last.RetentionTime, Is.EqualTo(3220.75));
Assert.That(last.PrecursorMz, Is.EqualTo(827.3577));
Assert.That(last.NeutralMass, Is.EqualTo(2479.0515));
Assert.That(last.PeptideMass, Is.EqualTo(2479.0457));
Assert.That(last.DeltaCn, Is.EqualTo(0.85362118));
Assert.That(last.XCorrScore, Is.EqualTo(5.682076));
Assert.That(last.XCorrRank, Is.EqualTo(1));
Assert.That(last.TailorScore, Is.EqualTo(1.8917845));
Assert.That(last.TdcQValue, Is.EqualTo(0.00000388501896).Within(1E-6));
Assert.That(last.BAndYIonsMatched, Is.EqualTo(32));
Assert.That(last.BAndYIonsTotal, Is.EqualTo(92));
Assert.That(last.BAndYIonsFraction, Is.EqualTo(0.34782609));
Assert.That(last.BAndYIonRepeatMatch, Is.EqualTo(0));
Assert.That(last.BaseSequence, Is.EqualTo("QDHPSSMGVYGQESGGFSGPGENR"));
Assert.That(last.FullSequence, Is.EqualTo("QDHPSSMGVYGQESGGFSGPGENR"));
Assert.That(last.ProteinId, Is.EqualTo("sp|Q01844|EWS_HUMAN(269)"));
Assert.That(last.FlankingAa, Is.EqualTo("RS"));
Assert.That(last.FileNameWithoutExtension, Is.EqualTo("B02_20_161103_E4_HCD_OT_4ul.raw"));
Assert.That(last.Accession, Is.EqualTo("Q01844"));
}


[Test]
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt")]
public void TestCruxResultsWriteResults(string path)
{
// load in original
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path);
var original = new CruxResultFile(filePath);

// write out original
var outputPath = Path.Combine(directoryPath, "cruxResults.csv");
original.WriteResults(outputPath);
Assert.That(File.Exists(outputPath));

// read in new original
var written = new CruxResultFile(outputPath);
Assert.That(written.Count(), Is.EqualTo(original.Count()));

// check are equivalent
for (int i = 0; i < original.Count(); i++)
{
var oldRecord = JsonConvert.SerializeObject(original.Results[i]);
var newRecord = JsonConvert.SerializeObject(written.Results[i]);
Assert.That(oldRecord, Is.EqualTo(newRecord));
}

// test writer still works without specifying extensions
var outputPathWithoutExtension = Path.Combine(directoryPath, "cruxResults");
original.WriteResults(outputPathWithoutExtension);
Assert.That(File.Exists(outputPathWithoutExtension + ".csv"));

var writtenWithoutExtension = new CruxResultFile(outputPathWithoutExtension + ".csv");
Assert.That(writtenWithoutExtension.Count(), Is.EqualTo(original.Count()));
}
}
}
2 changes: 1 addition & 1 deletion mzLib/Test/FileReadingTests/TestSupportedFileExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ internal class TestSupportedFileExtensions
[TestCase(@"FileReadingTests\ExternalFileTypes\FraggerProtein_FragPipev21.1individual_protein.tsv", SupportedFileType.MsFraggerProtein)]
[TestCase(@"FileReadingTests\ExternalFileTypes\FraggerPeptide_FragPipev21.1combined_peptide.tsv", SupportedFileType.MsFraggerPeptide)]
[TestCase(@"FileReadingTests\ExternalFileTypes\FraggerProtein_FragPipev21.1combined_protein.tsv", SupportedFileType.MsFraggerProtein)]

[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt", SupportedFileType.CruxResult)]
public static void TestSupportedFileTypeExtensions(string filePath, SupportedFileType expectedType)
{
var supportedType = filePath.ParseFileType();
Expand Down
3 changes: 3 additions & 0 deletions mzLib/Test/Test.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,9 @@
<None Update="DataFiles\BinGenerationTest.mzML">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="FileReadingTests\ExternalFileTypes\crux.txt">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="FileReadingTests\ExternalFileTypes\FraggerPeptide_FragPipev21.1combined_peptide.tsv">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
Expand Down
3 changes: 3 additions & 0 deletions mzLib/Test/TestMzLibUtil.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@ public sealed class TestMzLibUtil
[TestCase("penguin", "penguin")]
[TestCase("penguin.jpg.gz", "penguin")]
[TestCase("penguin.jpg.zip", "penguin")]
[TestCase("penguin.jpg.mzXML", "penguin.jpg")]
public static void TestPeriodTolerantFilenameWithoutExtension(string filenameAndOrPath, string expectedResult)
{
string result = PeriodTolerantFilenameWithoutExtension.GetPeriodTolerantFilenameWithoutExtension(filenameAndOrPath);
string extensionResult = filenameAndOrPath.GetPeriodTolerantFilenameWithoutExtension();
Assert.AreEqual(expectedResult, result);
Assert.AreEqual(expectedResult, extensionResult);
}
}
}

0 comments on commit bbbe9f2

Please sign in to comment.