Skip to content

Commit

Permalink
tests: update test file names
Browse files Browse the repository at this point in the history
  • Loading branch information
ibludau committed Jan 22, 2024
1 parent 273edf3 commit 6a10f2a
Showing 1 changed file with 275 additions and 0 deletions.
275 changes: 275 additions & 0 deletions tests/test_importing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,275 @@
#!python -m unittest tests.test_utils
"""This module provides unit tests for alphamap.cli."""

# builtin
import unittest

import os
import pandas as pd

# local
from alphamap.importing import read_file, \
extract_rawfile_unique_values, \
import_spectronaut_data, \
import_maxquant_data, \
convert_ap_mq_mod, \
import_alphapept_data, \
convert_diann_mq_mod, \
import_diann_data, \
convert_fragpipe_mq_mod, \
import_fragpipe_data, \
import_data

THIS_FOLDER = os.path.dirname(__file__)
TEST_FOLDER = os.path.join(
f"{os.path.dirname(THIS_FOLDER)}",
"nbs",
"testdata",
)

class TestImporting(unittest.TestCase):

def test_read_file(self,):
file_with_missing_col = os.path.join(TEST_FOLDER,'test_not_all_columns_spectronaut.csv')
spectronaut_columns = ["PEP.AllOccurringProteinAccessions","EG.ModifiedSequence","R.FileName"]

try:
out = read_file(file_with_missing_col, spectronaut_columns)
except ValueError as e:
out = e
assert str(out) == "The list of specified column names cannot be extracted from the file."

def test_extract_rawfile_unique_values(self, ):
data_Spectronaut_csv = os.path.join(TEST_FOLDER, 'test_spectronaut_input.csv')
data_Spectronaut_tsv = os.path.join(TEST_FOLDER, 'test_spectronaut_input.tsv')
assert ['raw_01', 'raw_02'] == extract_rawfile_unique_values(data_Spectronaut_csv)
assert extract_rawfile_unique_values(data_Spectronaut_csv) == extract_rawfile_unique_values(data_Spectronaut_tsv)

data_MQ = os.path.join(TEST_FOLDER, 'test_maxquant_input.txt')
assert len(extract_rawfile_unique_values(data_MQ)) == 381

data_wrong = os.path.join(TEST_FOLDER, 'test_maxquant_imported.csv')
try:
out = extract_rawfile_unique_values(data_wrong)
except ValueError as e:
out = e
assert str(out) == "A column with the raw file names is not in the file."

data_fragpipe_combined = os.path.join(TEST_FOLDER, 'combined_peptide.txt')
assert ['Y731F1', 'Y731F2', 'wt1', 'wt2'] == extract_rawfile_unique_values(data_fragpipe_combined)

def test_import_spectronaut_data(self, ):
# test entire input test data
data = import_spectronaut_data(os.path.join(TEST_FOLDER, 'test_spectronaut_input.csv'))
#print(data.shape[0])
assert data.shape[0] == 40
data_t = import_spectronaut_data(os.path.join(TEST_FOLDER, 'test_spectronaut_input.tsv'))
#print(data_t.shape[0])
pd.testing.assert_frame_equal(data, data_t)
test = pd.read_csv(os.path.join(TEST_FOLDER, 'test_spectronaut_imported.csv'), sep=',')
#print(test.shape[0])
pd.testing.assert_frame_equal(data, test)

# test single sample
data = import_spectronaut_data(os.path.join(TEST_FOLDER, 'test_spectronaut_input.csv'),
sample="raw_01")
#print(data.shape[0])
assert data.shape[0] == 40
data = import_spectronaut_data(os.path.join(TEST_FOLDER, 'test_spectronaut_input.csv'),
sample="raw_02")
#print(data.shape[0])
assert data.shape[0] == 20

# test multiple samples
data = import_spectronaut_data(os.path.join(TEST_FOLDER, 'test_spectronaut_input.csv'),
sample=["raw_01","raw_02"])
#print(data.shape[0])
assert data.shape[0] == 40

def test_import_maxquant_data(self, ):
data = import_maxquant_data(os.path.join(TEST_FOLDER, 'test_maxquant_input.txt'))
test = pd.read_csv(os.path.join(TEST_FOLDER, 'test_maxquant_imported.csv'), sep=',')
pd.testing.assert_frame_equal(data, test)

data_s = import_maxquant_data(os.path.join(TEST_FOLDER, 'test_maxquant_input.txt'),
sample = "raw_1")
assert data_s.shape[0] == 85

data_s = import_maxquant_data(os.path.join(TEST_FOLDER, 'test_maxquant_input.txt'),
sample = "raw_2")
assert data_s.shape[0] == 77

data_s = import_maxquant_data(os.path.join(TEST_FOLDER, 'test_maxquant_input.txt'),
sample = ["raw_1", "raw_2"])
assert data_s.shape[0] == 136

def test_convert_ap_mq_mod(self, ):
# test oxidation
seq1 = 'HAEoxMVHTGLK'
assert "HAEM[Oxidation (M)]VHTGLK" == convert_ap_mq_mod(seq1)
seq2 = 'HAEoxPVHTGLK'
assert "HAEP[Oxidation (MP)]VHTGLK" == convert_ap_mq_mod(seq2)
# test acetylation
seq4 = 'aMDEPSPLAQPLELNQHSR'
assert "[Acetyl (N-term)]MDEPSPLAQPLELNQHSR" == convert_ap_mq_mod(seq4)
seq5 = 'MDEPSaKPLA'
assert "MDEPSK[Acetyl (K)]PLA" == convert_ap_mq_mod(seq5)
# test amidation
seq6 = 'MDEPSKPLamA'
assert "MDEPSKPLA[Amidated (C-term)]" == convert_ap_mq_mod(seq6)
# test deamidation
seq7 = 'MDEPSdeamNKPLA'
assert "MDEPSN[Deamidation (NQ)]KPLA" == convert_ap_mq_mod(seq7)
seq8 = 'MDEPSdeamQKPLA'
assert "MDEPSQ[Deamidation (NQ)]KPLA" == convert_ap_mq_mod(seq8)
# test phosporylation
seq9 = 'MDEPSpSKPLA'
assert "MDEPSS[Phospho (STY)]KPLA" == convert_ap_mq_mod(seq9)
# test pyro-Glu
seq10 = 'MDpgEPSNKPLA'
assert "MDE[Glu->pyro-Glu]PSNKPLA" == convert_ap_mq_mod(seq10)
seq11 = 'MDpgQPSNKPLA'
assert "MDQ[Gln->pyro-Glu]PSNKPLA" == convert_ap_mq_mod(seq11)
# test disylfide bonds
seq12 = 'cCVNTTLQIK'
assert "C[Cys-Cys]VNTTLQIK" == convert_ap_mq_mod(seq12)
seq1_several_dif_mods = 'AcCLDYPVTSVLPPASLoxMK'
assert "AC[Cys-Cys]LDYPVTSVLPPASLM[Oxidation (M)]K" == convert_ap_mq_mod(seq1_several_dif_mods)
seq2_several_same_mods = 'LFTToxMELoxMR'
assert "LFTTM[Oxidation (M)]ELM[Oxidation (M)]R" == convert_ap_mq_mod(seq2_several_same_mods)
seq3_several_same_mods = 'LFTToxMELoxMRoxM'
assert "LFTTM[Oxidation (M)]ELM[Oxidation (M)]RM[Oxidation (M)]" == convert_ap_mq_mod(seq3_several_same_mods)
seq4_several_same_mods = 'LFTTdeamNELdeamNR'
assert "LFTTN[Deamidation (NQ)]ELN[Deamidation (NQ)]R" == convert_ap_mq_mod(seq4_several_same_mods)
seq_no_mod = 'CVNTTLQIK'
assert "CVNTTLQIK" == convert_ap_mq_mod(seq_no_mod)

def test_import_alphapept_data(self, ):
data = import_alphapept_data(os.path.join(TEST_FOLDER, 'test_alphapept_input.csv'))
assert data.shape == (4228, 3)

data_s1 = import_alphapept_data(os.path.join(TEST_FOLDER, 'test_alphapept_input.csv'),
sample = "exp_1")
assert data_s1.shape[0] == 2127

data_s2 = import_alphapept_data(os.path.join(TEST_FOLDER, 'test_alphapept_input.csv'),
sample = "exp_2")
assert data_s2.shape[0] == 2101

data_s_both = import_alphapept_data(os.path.join(TEST_FOLDER, 'test_alphapept_input.csv'),
sample = ["exp_1", "exp_2"])
assert data_s_both.shape[0] == 4228

def test_convert_diann_mq_mod(self, ):
seq1 = 'VSHGSSPSLLEALSSDFLAC(UniMod:4)K'
assert 'VSHGSSPSLLEALSSDFLAC[Carbamidomethyl (C)]K' == convert_diann_mq_mod(seq1)
seq2 = 'VSVINTVDTSHEDMIHDAQM(UniMod:35)DYYGTR'
assert 'VSVINTVDTSHEDMIHDAQM[Oxidation (M)]DYYGTR' == convert_diann_mq_mod(seq2)
seq3 = 'HAEMPVHTGLK(UniMod:2)'
assert 'HAEMPVHTGLK[Amidated (C-term)]' == convert_diann_mq_mod(seq3)
seq4 = 'HAEMPVHTGLKS(UniMod:23)A'
assert 'HAEMPVHTGLKS[Dehydrated (ST)]A' == convert_diann_mq_mod(seq4)
seq5 = 'HAEMPVHTGLKY(UniMod:23)A'
assert 'HAEMPVHTGLKY[Dehydrated (Y)]A' == convert_diann_mq_mod(seq5)

seq1_several_dif_mods = '(UniMod:1)VSHGSSPSLLEALSSDFLAC(UniMod:4)K'
assert '[Acetyl (N-term)]VSHGSSPSLLEALSSDFLAC[Carbamidomethyl (C)]K' == convert_diann_mq_mod(seq1_several_dif_mods)
seq2_several_same_mods = 'CAALVATAEENLC(UniMod:4)C(UniMod:4)EELSSK'
assert 'CAALVATAEENLC[Carbamidomethyl (C)]C[Carbamidomethyl (C)]EELSSK' == convert_diann_mq_mod(seq2_several_same_mods)
seq_no_mod = 'CVNTTLQIK'
assert "CVNTTLQIK" == convert_diann_mq_mod(seq_no_mod)

def test_import_diann_data(self, ):
data = import_diann_data(os.path.join(TEST_FOLDER, 'test_diann_input.tsv'))
assert data.shape == (44, 3)

data_s1 = import_diann_data(os.path.join(TEST_FOLDER, 'test_diann_input.tsv'),
sample = "20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648")
assert data_s1.shape[0] == 39

data_s_both = import_diann_data(os.path.join(TEST_FOLDER, 'test_diann_input.tsv'),
sample = ["20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648",
"20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-A2_1_22636"])
assert data_s_both.shape[0] == 42

def test_convert_fragpipe_mq_mod(self, ):
seq1 = 'AAEREPPPLGDGKPTDFEDLEDGEDLFTSTVSTLE'
modif1 = 'N-term(42.0106)'
assert '[Acetyl (N-term)]AAEREPPPLGDGKPTDFEDLEDGEDLFTSTVSTLE' == convert_fragpipe_mq_mod(seq1, modif1)
seq2 = 'AAEVISDARENIQRFFGHGAEDSLADQAANEWGRSGKDPNHFRPAGLPEKY'
modif2 = 'C-term(-0.9840)'
assert 'AAEVISDARENIQRFFGHGAEDSLADQAANEWGRSGKDPNHFRPAGLPEKY[Amidated (C-term)]' == convert_fragpipe_mq_mod(seq2, modif2)
seq3 = 'QESQSEEIDCNDKDLFKA'
modif3 = '1Q(-17.0265)'
assert 'Q[Gln->pyro-Glu]ESQSEEIDCNDKDLFKA' == convert_fragpipe_mq_mod(seq3, modif3)
seq4 = 'EKPLLEKSHCIAEVENDEMPA'
modif4 = '1E(-18.0106)'
assert 'E[Glu->pyro-Glu]KPLLEKSHCIAEVENDEMPA' == convert_fragpipe_mq_mod(seq4, modif4)
seq5 = 'SKPLLEKSHCIAEVENDEMPA'
modif5 = '1S(-18.0106)'
assert 'S[Dehydrated (ST)]KPLLEKSHCIAEVENDEMPA' == convert_fragpipe_mq_mod(seq5, modif5)
seq6 = 'YKPLLEKSHCIAEVENDEMPA'
modif6 = '1Y(-18.0106)'
assert 'Y[Dehydrated (Y)]KPLLEKSHCIAEVENDEMPA' == convert_fragpipe_mq_mod(seq6, modif6)
seq1_several_dif_mods = 'AAAAECDVVMAATEPELLDDQEAK'
seq1_several_dif_mods_modifs = '10M(15.9949),6C(57.0215),N-term(42.0106)'
assert '[Acetyl (N-term)]AAAAEC[Carbamidomethyl (C)]DVVM[Oxidation (M)]AATEPELLDDQEAK' == convert_fragpipe_mq_mod(
seq1_several_dif_mods, seq1_several_dif_mods_modifs)
seq2_several_dif_mods = 'PGFSIADKKR'
seq2_several_dif_mods_modifs = '8K(114.0429),9K(114.0429)'
assert 'PGFSIADK[GlyGly (K)]K[GlyGly (K)]R' == convert_fragpipe_mq_mod(
seq2_several_dif_mods, seq2_several_dif_mods_modifs)
seq_no_mod = 'CVNTTLQIK'
seq_no_mod_modifs = ''
assert "CVNTTLQIK" == convert_fragpipe_mq_mod(seq_no_mod, seq_no_mod_modifs)

def test_import_fragpipe_data(self, ):
data = import_fragpipe_data(os.path.join(TEST_FOLDER, 'test_fragpipe_input.tsv'))
assert data.shape == (50, 3)

sample = 'Y731F1'
file = os.path.join(TEST_FOLDER, 'combined_peptide.txt')
assert import_fragpipe_data(file, sample).shape == (23, 3)

two_samples = ['Y731F1', 'Y731F2']
assert import_fragpipe_data(file, two_samples).shape == (26, 3)

def test_import_data(self, ):
data_MQ = import_data(os.path.join(TEST_FOLDER, 'test_maxquant_input.txt'), verbose=False)
test = pd.read_csv(os.path.join(TEST_FOLDER, 'test_maxquant_imported.csv'), sep=',')
pd.testing.assert_frame_equal(data_MQ, test)

data_S_csv = import_data(os.path.join(TEST_FOLDER, 'test_spectronaut_input.csv'), verbose=False)
data_S_tsv = import_data(os.path.join(TEST_FOLDER, 'test_spectronaut_input.tsv'), verbose=False)
pd.testing.assert_frame_equal(data_S_csv, data_S_tsv)
test = pd.read_csv(os.path.join(TEST_FOLDER, 'test_spectronaut_imported.csv'), sep=',')
pd.testing.assert_frame_equal(data_S_csv, test)

data_S_sub = import_data(os.path.join(TEST_FOLDER, 'test_spectronaut_input.csv'),
sample = "raw_01",
verbose=False)
assert data_S_sub.shape[0] == 40

data_alphapept = import_data(os.path.join(TEST_FOLDER, 'test_alphapept_input.csv'),
sample = "exp_1",
verbose=False)
assert data_alphapept.shape[0] == 2127

data_diann = import_data(os.path.join(TEST_FOLDER, 'test_diann_input.tsv'),
sample = "20201218_tims03_Evo03_PS_SA_HeLa_200ng_high_speed_21min_8cm_S2-B2_1_22648",
verbose=False)
assert data_diann.shape[0] == 39

data_fragpipe = import_data(os.path.join(TEST_FOLDER, 'test_fragpipe_input.tsv'),
verbose=False)
assert data_fragpipe.shape[0] == 50

try:
out = import_data(os.path.join(TEST_FOLDER, 'test_uniprot_df.csv'))
except TypeError as e:
out = e
assert str(out) == "Input data format for "+os.path.join(TEST_FOLDER, 'test_uniprot_df.csv')+" not known."


if __name__ == "__main__":
unittest.main()

0 comments on commit 6a10f2a

Please sign in to comment.