forked from smith-chem-wisc/mzLib
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added support for crux output (smith-chem-wisc#774)
* Added support for crux output * cleaned up code and added one test * reverted change to period tolerant file name without extension --------- Co-authored-by: Edwin Laboy <63374885+elaboy@users.noreply.github.com> Co-authored-by: trishorts <mshort@chem.wisc.edu>
- Loading branch information
1 parent
20db903
commit bbbe9f2
Showing
10 changed files
with
311 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
91 changes: 91 additions & 0 deletions
91
mzLib/Readers/ExternalResults/IndividualResultRecords/CruxResult.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
using System.Globalization; | ||
using System.Text; | ||
using CsvHelper.Configuration; | ||
using CsvHelper.Configuration.Attributes; | ||
using MzLibUtil; | ||
|
||
namespace Readers | ||
{ | ||
public class CruxResult | ||
{ | ||
public static CsvConfiguration CsvConfiguration => new CsvConfiguration(CultureInfo.InvariantCulture) | ||
{ | ||
Encoding = Encoding.UTF8, | ||
HasHeaderRecord = true, | ||
Delimiter = "\t", | ||
}; | ||
|
||
[Name("file")] | ||
public string FilePath { get; set; } | ||
|
||
[Name("scan")] | ||
public int OneBasedScanNumber { get; set; } | ||
|
||
[Name("charge")] | ||
public int Charge { get; set; } | ||
|
||
[Name("retention time")] | ||
public double RetentionTime { get; set; } | ||
|
||
[Name("spectrum precursor m/z")] | ||
public double PrecursorMz { get; set; } | ||
|
||
[Name("spectrum neutral mass")] | ||
public double NeutralMass { get; set; } | ||
|
||
[Name("peptide mass")] | ||
public double PeptideMass { get; set; } | ||
|
||
[Name("delta_cn")] | ||
public double DeltaCn { get; set; } | ||
|
||
[Name("xcorr score")] | ||
public double XCorrScore { get; set; } | ||
|
||
[Name("xcorr rank")] | ||
public int XCorrRank { get; set; } | ||
|
||
[Name("tailor score")] | ||
public double TailorScore { get; set; } | ||
|
||
[Name("tdc q-value")] | ||
public double TdcQValue { get; set; } | ||
|
||
[Name("b/y ions matched")] | ||
public int BAndYIonsMatched { get; set; } | ||
|
||
[Name("b/y ions total")] | ||
public int BAndYIonsTotal { get; set; } | ||
|
||
[Name("b/y ions fraction")] | ||
public double BAndYIonsFraction { get; set; } | ||
|
||
[Name("b/y ion repeat match")] | ||
public int BAndYIonRepeatMatch { get; set; } | ||
|
||
[Name("distinct matches/spectrum")] | ||
public int DistinctMatchesPerSpectrum { get; set; } | ||
|
||
[Name("sequence")] | ||
public string FullSequence { get; set; } | ||
|
||
[Name("unmodified sequence")] | ||
public string BaseSequence { get; set; } | ||
|
||
[Name("protein id")] | ||
public string ProteinId { get; set; } | ||
|
||
[Name("flanking aa")] | ||
public string FlankingAa { get; set; } | ||
|
||
#region Interpreted properties | ||
|
||
[Ignore] private string? _fileNameWithoutExtension = null; | ||
[Ignore] public string FileNameWithoutExtension => _fileNameWithoutExtension ??= FilePath.GetPeriodTolerantFilenameWithoutExtension(); | ||
|
||
[Ignore] private string? _accession = null; | ||
[Ignore] public string Accession => _accession ??= ProteinId.Split('|')[1].Trim(); | ||
|
||
#endregion | ||
} | ||
} |
35 changes: 35 additions & 0 deletions
35
mzLib/Readers/ExternalResults/ResultFiles/CruxResultFile.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
| ||
namespace Readers | ||
{ | ||
public class CruxResultFile : ResultFile<CruxResult>, IResultFile | ||
{ | ||
public override SupportedFileType FileType => SupportedFileType.CruxResult; | ||
public override Software Software { get; set; } | ||
|
||
public CruxResultFile(string filePath) : base(filePath, Software.Crux) { } | ||
|
||
public CruxResultFile() : base() { } | ||
|
||
public override void LoadResults() | ||
{ | ||
using var csv = new CsvHelper.CsvReader(new StreamReader(FilePath), CruxResult.CsvConfiguration); | ||
Results = csv.GetRecords<CruxResult>().ToList(); | ||
} | ||
|
||
public override void WriteResults(string outputPath) | ||
{ | ||
if (!CanRead(FilePath)) | ||
outputPath += FileType.GetFileExtension(); | ||
|
||
using (var csv = new CsvHelper.CsvWriter(new StreamWriter(File.Create(outputPath)), CruxResult.CsvConfiguration)) | ||
{ | ||
csv.WriteHeader<CruxResult>(); | ||
foreach (var result in Results) | ||
{ | ||
csv.NextRecord(); | ||
csv.WriteRecord(result); | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,5 +11,6 @@ public enum Software | |
MaxQuant, | ||
Toppic, | ||
MsFragger, // files tested were from fragpipe v21.1 | ||
Crux | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
file scan charge retention time spectrum precursor m/z spectrum neutral mass peptide mass delta_cn xcorr score xcorr rank tailor score tdc q-value b/y ions matched b/y ions total b/y ions fraction b/y ion repeat match distinct matches/spectrum sequence unmodified sequence protein id flanking aa | ||
/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML 14674 3 2747.6599 1075.1815 3222.5227 3222.5222 0.84335566 6.4364114 1 1.9659604 3.8850189e-06 51 116 0.43965518 0 68 RPQYSNPPVQGEVMEGADNQGAGEQGRPVR RPQYSNPPVQGEVMEGADNQGAGEQGRPVR sp|P67809|YBOX1_HUMAN(205) RQ | ||
/hdd/data/PXD005590/B02_20_161103_E4_HCD_OT_4ul.raw.mzXML 15417 3 2814.6499 1075.182 3222.5242 3222.5222 0.86036599 6.3186069 1 1.9550625 3.8850189e-06 48 116 0.41379309 3 68 RPQYSNPPVQGEVMEGADNQGAGEQGRPVR RPQYSNPPVQGEVMEGADNQGAGEQGRPVR sp|P67809|YBOX1_HUMAN(205) RQ | ||
/hdd/data/PXD005590/B02_18_161103_B4_HCD_OT_4ul.raw.mzXML 6847 4 2012.87 918.185 3668.7109 3668.7124 0.83817238 6.7191076 1 1.9478002 3.8850189e-06 53 191 0.27748692 0 64 AASAAGAAGSAGGSSGAAGAAGGGAGAGTRPGDGGTASAGAAGPGAATK AASAAGAAGSAGGSSGAAGAAGGGAGAGTRPGDGGTASAGAAGPGAATK sp|Q9UKY7|CDV3_HUMAN(28) RA | ||
/hdd/data/PXD005590/B02_06_161103_A1_HCD_OT_4ul.raw.mzXML 74906 3 8146.6001 1004.5292 3010.5659 3010.5623 0.86094695 6.1447253 1 1.9289217 3.8850189e-06 39 116 0.33620691 0 122 HIADLAGNSEVILPVPAFNVINGGSHAGNK HIADLAGNSEVILPVPAFNVINGGSHAGNK sp|P06733|ENOA_HUMAN(133) RL | ||
/hdd/data/PXD005590/B02_22_161103_D1_HCD_OT_4ul.raw.mzXML 65300 3 7277.5698 867.7704 2600.2896 2600.2869 0.86649311 6.2026334 1 1.9265088 3.8850189e-06 37 96 0.38541666 0 160 NHDTGVSPVFAGGVEYAITPEIATR NHDTGVSPVFAGGVEYAITPEIATR sp|P0A910|OMPA_ECOLI(135) KL | ||
/hdd/data/PXD005590/B02_11_161103_D2_HCD_OT_4ul.raw.mzXML 32062 4 4347.98 668.6035 2670.3848 2670.3875 0.82502377 6.3233223 1 1.9088538 3.8850189e-06 41 95 0.43157896 0 102 EEHEVAVLGAPHNPAPPTSTVIHIR EEHEVAVLGAPHNPAPPTSTVIHIR sp|Q01628|IFM3_HUMAN(25) KS | ||
/hdd/data/PXD005590/B02_16_161103_A3_HCD_OT_4ul.raw.mzXML 51309 4 6030.9102 884.4734 3533.8647 3533.8586 0.86957496 6.0390177 1 1.9073439 3.8850189e-06 35 139 0.25179857 0 26 AHSSPASLQLGAVSPGTLTPTGVVSGPAATPTAQHLR AHSSPASLQLGAVSPGTLTPTGVVSGPAATPTAQHLR sp|P46937|YAP1_HUMAN(125) RQ | ||
/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML 59327 4 6765.9102 771.1469 3080.5586 3080.561 0.81879568 6.7819619 1 1.9058784 3.8850189e-06 43 127 0.33858269 0 82 GAAAQGQTQTVAAQAQALAAQAAAAAHAAQAHR GAAAQGQTQTVAAQAQALAAQAAAAAHAAQAHR sp|Q9BTU6|P4K2A_HUMAN(67) RE | ||
/hdd/data/PXD005590/B02_06_161103_A1_HCD_OT_4ul.raw.mzXML 4435 3 1815.64 751.6635 2251.9685 2251.9666 0.84570491 6.0909443 1 1.9016477 3.8850189e-06 38 92 0.41304347 0 112 APKPDGPGGGPGGSHMGGNYGDDR APKPDGPGGGPGGSHMGGNYGDDR sp|P35637|FUS_HUMAN(449) KR | ||
/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML 19161 3 3172.6001 827.3565 2479.0476 2479.0457 0.86390048 5.704639 1 1.8994089 3.8850189e-06 38 92 0.41304347 0 52 QDHPSSMGVYGQESGGFSGPGENR QDHPSSMGVYGQESGGFSGPGENR sp|Q01844|EWS_HUMAN(269) RS | ||
/hdd/data/PXD005590/B02_24_161103_C1_HCD_OT_4ul.raw.mzXML 58893 2 6700.6001 1396.1667 2790.3188 2790.3218 0.85683089 5.7747865 1 1.897424 3.8850189e-06 31 52 0.59615386 3 134 HTGPGILSMANAGPNTNGSQFFICTAK HTGPGILSMANAGPNTNGSQFFICTAK sp|P62937|PPIA_HUMAN(92) KT | ||
/hdd/data/PXD005590/B02_001_161103_B1_HCD_OT_4ul.raw.mzXML 17264 3 2921.5601 1075.1821 3222.5247 3222.5222 0.81186515 5.8244729 1 1.8960458 3.8850189e-06 49 116 0.4224138 0 68 RPQYSNPPVQGEVMEGADNQGAGEQGRPVR RPQYSNPPVQGEVMEGADNQGAGEQGRPVR sp|P67809|YBOX1_HUMAN(205) RQ | ||
/hdd/data/PXD005590/B02_19_161103_C4_HCD_OT_4ul.raw.mzXML 72508 3 7969.2202 1004.5298 3010.5674 3010.5623 0.85636955 5.7181306 1 1.894421 3.8850189e-06 36 116 0.31034482 0 122 HIADLAGNSEVILPVPAFNVINGGSHAGNK HIADLAGNSEVILPVPAFNVINGGSHAGNK sp|P06733|ENOA_HUMAN(133) RL | ||
/hdd/data/PXD005590/B02_20_161103_E4_HCD_OT_4ul.raw.mzXML 19752 3 3220.75 827.3577 2479.0515 2479.0457 0.85362118 5.682076 1 1.8917845 3.8850189e-06 32 92 0.34782609 0 52 QDHPSSMGVYGQESGGFSGPGENR QDHPSSMGVYGQESGGFSGPGENR sp|Q01844|EWS_HUMAN(269) RS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
using NUnit.Framework; | ||
using System.Diagnostics.CodeAnalysis; | ||
using System.IO; | ||
using System.Linq; | ||
using Newtonsoft.Json; | ||
using Readers; | ||
|
||
namespace Test.FileReadingTests | ||
{ | ||
[TestFixture] | ||
[ExcludeFromCodeCoverage] | ||
internal class TestCruxReader | ||
{ | ||
private static string directoryPath; | ||
|
||
[OneTimeSetUp] | ||
public void SetUp() | ||
{ | ||
directoryPath = Path.Combine(TestContext.CurrentContext.TestDirectory, | ||
@"FileReadingTests\ReadingWritingTests"); | ||
Directory.CreateDirectory(directoryPath); | ||
} | ||
|
||
[OneTimeTearDown] | ||
public void TearDown() | ||
{ | ||
Directory.Delete(directoryPath, true); | ||
} | ||
|
||
[Test] | ||
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt", 14)] | ||
public void TestCruxResultsLoadsAndCountCorrect(string path, int recordCount) | ||
{ | ||
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); | ||
CruxResultFile file = new CruxResultFile(filePath); | ||
Assert.That(file.Count(), Is.EqualTo(recordCount)); | ||
Assert.That(file.CanRead(path)); | ||
} | ||
|
||
[Test] | ||
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt", 14)] | ||
public static void TestCruxResultsFromGenericReader(string path, int recordCount) | ||
{ | ||
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); | ||
var constructedFile = new CruxResultFile(filePath); | ||
var genericFile = FileReader.ReadFile<CruxResultFile>(filePath); | ||
|
||
Assert.That(genericFile.Count(), Is.EqualTo(recordCount)); | ||
Assert.That(genericFile.Count(), Is.EqualTo(constructedFile.Count())); | ||
Assert.That(genericFile.FilePath, Is.EqualTo(constructedFile.FilePath)); | ||
} | ||
|
||
[Test] | ||
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt")] | ||
public void TestCruxResultsFirstAndLastAreCorrect(string path) | ||
{ | ||
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); | ||
var file = new CruxResultFile(filePath); | ||
|
||
var first = file.First(); | ||
var last = file.Last(); | ||
|
||
Assert.That(first.FilePath, Is.EqualTo(@"/hdd/data/PXD005590/B02_21_161103_D4_HCD_OT_4ul.raw.mzXML")); | ||
Assert.That(first.OneBasedScanNumber, Is.EqualTo(14674)); | ||
Assert.That(first.Charge, Is.EqualTo(3)); | ||
Assert.That(first.RetentionTime, Is.EqualTo(2747.6599)); | ||
Assert.That(first.PrecursorMz, Is.EqualTo(1075.1815)); | ||
Assert.That(first.NeutralMass, Is.EqualTo(3222.5227)); | ||
Assert.That(first.PeptideMass, Is.EqualTo(3222.5222)); | ||
Assert.That(first.DeltaCn, Is.EqualTo(0.84335566)); | ||
Assert.That(first.XCorrScore, Is.EqualTo(6.4364114)); | ||
Assert.That(first.XCorrRank, Is.EqualTo(1)); | ||
Assert.That(first.TailorScore, Is.EqualTo(1.9659604)); | ||
Assert.That(first.TdcQValue, Is.EqualTo(0.0000038850189).Within(1E-6)); | ||
Assert.That(first.BAndYIonsMatched, Is.EqualTo(51)); | ||
Assert.That(first.BAndYIonsTotal, Is.EqualTo(116)); | ||
Assert.That(first.BAndYIonsFraction, Is.EqualTo(0.43965518)); | ||
Assert.That(first.BAndYIonRepeatMatch, Is.EqualTo(0)); | ||
Assert.That(first.BaseSequence, Is.EqualTo("RPQYSNPPVQGEVMEGADNQGAGEQGRPVR")); | ||
Assert.That(first.FullSequence, Is.EqualTo("RPQYSNPPVQGEVMEGADNQGAGEQGRPVR")); | ||
Assert.That(first.ProteinId, Is.EqualTo("sp|P67809|YBOX1_HUMAN(205)")); | ||
Assert.That(first.FlankingAa, Is.EqualTo("RQ")); | ||
Assert.That(first.FileNameWithoutExtension, Is.EqualTo("B02_21_161103_D4_HCD_OT_4ul.raw")); | ||
Assert.That(first.Accession, Is.EqualTo("P67809")); | ||
|
||
Assert.That(last.FilePath, Is.EqualTo(@"/hdd/data/PXD005590/B02_20_161103_E4_HCD_OT_4ul.raw.mzXML")); | ||
Assert.That(last.OneBasedScanNumber, Is.EqualTo(19752)); | ||
Assert.That(last.Charge, Is.EqualTo(3)); | ||
Assert.That(last.RetentionTime, Is.EqualTo(3220.75)); | ||
Assert.That(last.PrecursorMz, Is.EqualTo(827.3577)); | ||
Assert.That(last.NeutralMass, Is.EqualTo(2479.0515)); | ||
Assert.That(last.PeptideMass, Is.EqualTo(2479.0457)); | ||
Assert.That(last.DeltaCn, Is.EqualTo(0.85362118)); | ||
Assert.That(last.XCorrScore, Is.EqualTo(5.682076)); | ||
Assert.That(last.XCorrRank, Is.EqualTo(1)); | ||
Assert.That(last.TailorScore, Is.EqualTo(1.8917845)); | ||
Assert.That(last.TdcQValue, Is.EqualTo(0.00000388501896).Within(1E-6)); | ||
Assert.That(last.BAndYIonsMatched, Is.EqualTo(32)); | ||
Assert.That(last.BAndYIonsTotal, Is.EqualTo(92)); | ||
Assert.That(last.BAndYIonsFraction, Is.EqualTo(0.34782609)); | ||
Assert.That(last.BAndYIonRepeatMatch, Is.EqualTo(0)); | ||
Assert.That(last.BaseSequence, Is.EqualTo("QDHPSSMGVYGQESGGFSGPGENR")); | ||
Assert.That(last.FullSequence, Is.EqualTo("QDHPSSMGVYGQESGGFSGPGENR")); | ||
Assert.That(last.ProteinId, Is.EqualTo("sp|Q01844|EWS_HUMAN(269)")); | ||
Assert.That(last.FlankingAa, Is.EqualTo("RS")); | ||
Assert.That(last.FileNameWithoutExtension, Is.EqualTo("B02_20_161103_E4_HCD_OT_4ul.raw")); | ||
Assert.That(last.Accession, Is.EqualTo("Q01844")); | ||
} | ||
|
||
|
||
[Test] | ||
[TestCase(@"FileReadingTests\ExternalFileTypes\crux.txt")] | ||
public void TestCruxResultsWriteResults(string path) | ||
{ | ||
// load in original | ||
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, path); | ||
var original = new CruxResultFile(filePath); | ||
|
||
// write out original | ||
var outputPath = Path.Combine(directoryPath, "cruxResults.csv"); | ||
original.WriteResults(outputPath); | ||
Assert.That(File.Exists(outputPath)); | ||
|
||
// read in new original | ||
var written = new CruxResultFile(outputPath); | ||
Assert.That(written.Count(), Is.EqualTo(original.Count())); | ||
|
||
// check are equivalent | ||
for (int i = 0; i < original.Count(); i++) | ||
{ | ||
var oldRecord = JsonConvert.SerializeObject(original.Results[i]); | ||
var newRecord = JsonConvert.SerializeObject(written.Results[i]); | ||
Assert.That(oldRecord, Is.EqualTo(newRecord)); | ||
} | ||
|
||
// test writer still works without specifying extensions | ||
var outputPathWithoutExtension = Path.Combine(directoryPath, "cruxResults"); | ||
original.WriteResults(outputPathWithoutExtension); | ||
Assert.That(File.Exists(outputPathWithoutExtension + ".csv")); | ||
|
||
var writtenWithoutExtension = new CruxResultFile(outputPathWithoutExtension + ".csv"); | ||
Assert.That(writtenWithoutExtension.Count(), Is.EqualTo(original.Count())); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters