Skip to content

Commit

Permalink
fixed standard numbering
Browse files Browse the repository at this point in the history
  • Loading branch information
alacheim committed Jun 20, 2024
1 parent 725b8c9 commit 3f935fa
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 5 deletions.
81 changes: 81 additions & 0 deletions pyeed/core/alignmentresult.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

from .sequence import Sequence
from .standardnumbering import StandardNumbering
from pyeed.core.numberedsequence import NumberedSequence


class AlignmentResult(
Expand Down Expand Up @@ -161,3 +162,83 @@ def visualize(self):
)
mv.plotfig()
os.remove(temp_file)


@staticmethod
#do we get correct results
def _get_numbering_string(reference: str, query: str) -> List[str]:
"""
Assigns pairwise numbering to the reference and query sequences.
Args:
reference (str): The reference sequence.
query (str): The query sequence.
Returns:
List[str]: A list of pairwise numbering.
"""

numbering = []
reference_counter = 0
query_counter = 1

for ref_pos, que_pos in zip(reference, query):

if ref_pos == "-":
numbering.append(f"{reference_counter}.{query_counter}")
query_counter += 1
else:
reference_counter += 1
if que_pos != "-":
numbering.append(f"{reference_counter}.{query_counter}")
query_counter += 1
else:
numbering.append(str(reference_counter))

print(numbering)
return numbering


def apply_standard_numbering(
self,
reference: Sequence,
):
"""
Apply standard numbering to the aligned sequences.
Args:
reference (Sequence, optional): The reference sequence to use for numbering.
If not provided, the first aligned sequence will be used as the reference.
Defaults to None.
Raises:
ValueError: If the sequences are not aligned.
"""
if not self.aligned_sequences:
raise ValueError(
"Sequences must be aligned first. Run the align() method first."
)

if reference not in self.aligned_sequences:
raise ValueError(
"Reference Sequence is not part of the aligned sequences. Please choose a new reference sequence!"
)

numbered_sequence = []
for aligned_sequence in self.aligned_sequences:

numbering = self._get_numbering_string(
reference=reference.sequence, query=aligned_sequence.sequence #type:ignore
)

numbered_sequence.append(NumberedSequence(
numbered_id = aligned_sequence.sequence_id,
numbering = numbering
)
)

self.standard_numbering = StandardNumbering(
reference_id=reference.sequence_id,
numbered_sequences=numbered_sequence
)
13 changes: 8 additions & 5 deletions pyeed/core/standardnumbering.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ class StandardNumbering(
json_schema_extra=dict(),
)

numberd_sequences: List[NumberedSequence] = element(
numbered_sequences: List[NumberedSequence] = element(
description="Numbered sequence of the aligned sequence",
default_factory=ListPlus,
tag="numberd_sequences",
tag="numbered_sequences",
json_schema_extra=dict(
multiple=True,
),
Expand All @@ -59,7 +59,7 @@ def _parse_raw_xml_data(self):

return self

def add_to_numberd_sequences(
def add_to_numbered_sequences(
self,
numbered_id: Optional[str] = None,
numbering: List[str] = ListPlus(),
Expand All @@ -85,6 +85,9 @@ def add_to_numberd_sequences(

obj = NumberedSequence(**params)

self.numberd_sequences.append(obj)
self.numbered_sequences.append(obj)

return self.numberd_sequences[-1]
return self.numbered_sequences[-1]



0 comments on commit 3f935fa

Please sign in to comment.