Skip to content

Commit

Permalink
fixed test runs, some still missing
Browse files Browse the repository at this point in the history
  • Loading branch information
NiklasAbraham committed May 10, 2024
1 parent 59302c7 commit d52693a
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 57 deletions.
20 changes: 2 additions & 18 deletions pyeed/network/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from pyeed.core.proteinrecord import ProteinRecord
from pyeed.core.sequencerecord import SequenceRecord
from pyeed.align.pairwise_aligner import PairwiseAligner
from pyeed.align.pairwise import PairwiseAligner


class SequenceNetwork(BaseModel):
Expand Down Expand Up @@ -127,18 +127,13 @@ def _create_graph(self):
if all([isinstance(sequence, ProteinRecord) for sequence in self.sequences]):
node_data = []

print(time.time(), '----- Processing in for loop')

for sequence in self.sequences:
id_seq, seq, data = self._process_sequence(sequence)
node_data.append((id_seq, data))
alignment_data[id_seq] = seq

print(time.time(), "----- Adding in networx")

self.network.add_nodes_from(node_data)

print(time.time(), "----- nodes added ")

else:
for sequence in self.sequences:
Expand All @@ -164,27 +159,16 @@ def _create_graph(self):

# create the alignments
alignments_results = self._aligner.align_multipairwise(alignment_data)
# now pydantic cant seem to handle the serialization of the alignment results in the field start and end
# so we need to convert them to lists
for alignment_result in alignments_results:
alignment_result['start'] = alignment_result['start'].tolist()
alignment_result['end'] = alignment_result['end'].tolist()
# create a list for the egdes
edge_data = []

print(time.time(), "Processing Pairs in for loop")

for alignment_result in alignments_results:
edge = (alignment_result['seq1_id'], alignment_result['seq2_id'], {key: value for key, value in alignment_result.items() if key not in ['seq1_id', 'seq2_id']})
edge = (alignment_result['sequences'][0]['id'], alignment_result['sequences'][1]['id'], {key: value for key, value in alignment_result.items()})
if edge:
edge_data.append(edge)

print(time.time(), "Adding edeges from list in networx")

self.network.add_edges_from(edge_data)

print(time.time(), "edges added in network")

# Calculate node positions based on dimensions
if self.dimensions == 2:
return self._2d_position_nodes_and_edges(self.network)
Expand Down
63 changes: 27 additions & 36 deletions tests/unit/align_tests/test_pairwise_aligner.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np

from pyeed.core.proteinrecord import ProteinRecord
from pyeed.align.pairwise_aligner import PairwiseAligner
from pyeed.align.pairwise import PairwiseAligner


class TestPairwiseAligner():
Expand All @@ -10,10 +10,11 @@ def test_align_default_parameters_returns_dictionary(self):
aligner = PairwiseAligner(mode="global")
seq1 = "ATCG"
seq2 = "AGTC"
alignment_result = aligner.align_pairwise(seq1, seq2, "seq1", "seq2")
expected_keys = ["seq1", "seq2", "score", "mismatches", "gaps", "identity", "start", "end"]
alignment_result = aligner.align_pairwise(seq1=dict(seq1="ATCG"), seq2=dict(seq2="AGTC"))
expected_keys = ["sequences", "aligned_sequences", "score", "mismatches", "gaps", "identity"]
assert isinstance(alignment_result, dict)
assert all(key in alignment_result for key in expected_keys)


def test_align_multipairwise_with_more_than_two_sequences(self):
aligner = PairwiseAligner(mode="global")
Expand All @@ -24,30 +25,24 @@ def test_align_multipairwise_with_more_than_two_sequences(self):
"seq4": "TTTT"
}
alignments = aligner.align_multipairwise(sequences)
expected_keys = ["seq1", "seq2", "score", "mismatches", "gaps", "identity", "start", "end", "seq1_id", "seq2_id"]
assert isinstance(alignments, list)
assert all(isinstance(alignment, dict) for alignment in alignments)
assert all(all(key in alignment for key in expected_keys) for alignment in alignments)
assert all(isinstance(alignment, dict) for alignment in alignments)


def test_align_pairwise_pair_results(self):
aligner = PairwiseAligner(mode="global")
seq1 = "ATCG"
seq2 = "AGTC"
alignment_result = aligner.align_pairwise(seq1, seq2, "seq1", "seq2")
expected_keys = ["seq1", "seq2", "score", "mismatches", "gaps", "identity", "start", "end", "seq1_id", "seq2_id"]
alignment_result = aligner.align_pairwise(seq1=dict(seq1="ATCG"), seq2=dict(seq2="AGTC"))
expected_keys = ["sequences", "aligned_sequences", "score", "mismatches", "gaps", "identity"]
assert isinstance(alignment_result, dict)
assert all(key in alignment_result for key in expected_keys)
assert alignment_result['seq1'] == "A-TCG"
assert alignment_result['seq2'] == "AGTC-"
assert alignment_result['seq1_id'] == "seq1"
assert alignment_result['seq2_id'] == "seq2"
assert alignment_result['aligned_sequences'][0]['sequence'] == "A-TCG"
assert alignment_result['aligned_sequences'][1]['sequence'] == "AGTC-"
assert alignment_result['score'] == 1.0
assert alignment_result['mismatches'] == 1.0
assert alignment_result['gaps'] == 0.3333333333333333
assert alignment_result['mismatches'] == 0.0
assert alignment_result['gaps'] == 2
assert alignment_result['identity'] == 0.6
assert (alignment_result['start'] == np.array([[0, 1], [1, 3]])).all()
assert (alignment_result['end'] == np.array([[0, 1], [2, 4]])).all()


def test_align_multi_pairwise_results(self):
Expand All @@ -59,33 +54,29 @@ def test_align_multi_pairwise_results(self):
"seq4": "TTTT"
}
alignments = aligner.align_multipairwise(sequences)
expected_keys = ["seq1", "seq2", "score", "mismatches", "gaps", "identity", "start", "end", "seq1_id", "seq2_id"]
expected_keys = ["sequences", "aligned_sequences", "score", "mismatches", "gaps", "identity"]
assert isinstance(alignments, list)
assert all(isinstance(alignment, dict) for alignment in alignments)
assert all(all(key in alignment for key in expected_keys) for alignment in alignments)
assert alignments[0]['seq1'] == "A-TCG"
assert alignments[0]['seq2'] == "AGTC-"
assert alignments[0]['aligned_sequences'][0]['sequence'] == "A-TCG"
assert alignments[0]['aligned_sequences'][1]['sequence'] == "AGTC-"
assert alignments[0]['score'] == 1.0
assert alignments[0]['mismatches'] == 1.0
assert alignments[0]['gaps'] == 0.3333333333333333
assert alignments[0]['mismatches'] == 0
assert alignments[0]['gaps'] == 2
assert alignments[0]['identity'] == 0.6
assert (alignments[0]['start'] == np.array([[0, 1], [1, 3]])).all()
assert (alignments[0]['end'] == np.array([[0, 1], [2, 4]])).all()

assert alignments[1]['seq1'] == "---ATCG"
assert alignments[1]['seq2'] == "AAAA---"
assert alignments[1]['aligned_sequences'][0]['sequence'] == "---ATCG"
assert alignments[1]['aligned_sequences'][1]['sequence'] == "AAAA---"
assert alignments[1]['score'] == -1.0
assert alignments[1]['mismatches'] == 1.0
assert alignments[1]['gaps'] == 0.14285714285714285
assert alignments[1]['mismatches'] == 0
assert alignments[1]['gaps'] == 6
assert alignments[1]['identity'] == 0.14285714285714285
assert (alignments[1]['start'] == np.array([[0, 1]])).all()
assert (alignments[1]['end'] == np.array([[3, 4]])).all()

assert alignments[2]['seq1'] == "ATCG"
assert alignments[2]['seq2'] == "TTTT"
assert alignments[2]['aligned_sequences'][0]['sequence'] == "ATCG"
assert alignments[2]['aligned_sequences'][1]['sequence'] == "TTTT"
assert alignments[2]['score'] == -2.0
assert alignments[2]['mismatches'] == 0.25
assert alignments[2]['gaps'] == 1.0
assert alignments[2]['mismatches'] == 3
assert alignments[2]['gaps'] == 0


def test_align_multi_pairwise_from_ncbi(self):
Expand All @@ -101,10 +92,10 @@ def test_align_multi_pairwise_from_ncbi(self):

alignments = aligner.align_multipairwise(sequences_data_align)

expected_keys = ["seq1", "seq2", "score", "mismatches", "gaps", "identity", "start", "end", "seq1_id", "seq2_id"]
expected_keys = ["sequences", "aligned_sequences", "score", "mismatches", "gaps", "identity"]
assert isinstance(alignments, list)
assert all(isinstance(alignment, dict) for alignment in alignments)
assert all(all(key in alignment for key in expected_keys) for alignment in alignments)
assert alignments[0]['seq1_id'] == "MBP1912539.1" or alignments[0]['seq2_id'] == "MBP1912539.1"
assert alignments[0]['seq2_id'] == "SEV92896.1" or alignments[0]['seq1_id'] == "SEV92896.1"
assert alignments[0]['aligned_sequences'][0]['id'] == "MBP1912539.1" or alignments[0]['aligned_sequences'][1]['id'] == "MBP1912539.1"
assert alignments[0]['aligned_sequences'][0]['id'] == "SEV92896.1" or alignments[0]['aligned_sequences'][1]['id'] == "SEV92896.1"
assert alignments[0]['score'] == 327.0
5 changes: 4 additions & 1 deletion tests/unit/alignment_tests/test_hmm.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@

# Generated by CodiumAI
"""
from pyeed.core.alignment import Alignment
from pyeed.align.hmm import HMM
Expand All @@ -18,4 +19,6 @@ def test_initialized_with_name_and_alignment(self):
assert hmm.name == "test_hmm"
assert hmm.alignment == alignment
assert hmm.model is None
assert hmm.model is None
"""
2 changes: 0 additions & 2 deletions tests/unit/network_tests/test_newtork_graph_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pyeed.core import ProteinRecord
from pyeed.network import SequenceNetwork


class TestNetworkGraphBuild:

def test_general_build_networkx(self):
Expand Down Expand Up @@ -74,4 +73,3 @@ def test_cytoscape_nodes_size(self):
network.create_cytoscape_graph(collection="tests", title="test_cytoscape", threshold=threshhold)
network.set_nodes_size(column_name="degree_with_threshold_{}".format(threshhold), min_size=20, max_size=100)
network.color_nodes(column_name='species')

0 comments on commit d52693a

Please sign in to comment.