From 3f935fa6eb05f78811ec5ad8ed5ce3b9ce5e7207 Mon Sep 17 00:00:00 2001 From: Alina Lacheim Date: Thu, 20 Jun 2024 13:34:55 +0200 Subject: [PATCH] fixed standard numbering --- pyeed/core/alignmentresult.py | 81 +++++++++++++++++++++++++++++++++ pyeed/core/standardnumbering.py | 13 ++++-- 2 files changed, 89 insertions(+), 5 deletions(-) diff --git a/pyeed/core/alignmentresult.py b/pyeed/core/alignmentresult.py index f1a07feb..df6741f8 100644 --- a/pyeed/core/alignmentresult.py +++ b/pyeed/core/alignmentresult.py @@ -12,6 +12,7 @@ from .sequence import Sequence from .standardnumbering import StandardNumbering +from pyeed.core.numberedsequence import NumberedSequence class AlignmentResult( @@ -161,3 +162,83 @@ def visualize(self): ) mv.plotfig() os.remove(temp_file) + + + @staticmethod + #do we get correct results + def _get_numbering_string(reference: str, query: str) -> List[str]: + """ + Assigns pairwise numbering to the reference and query sequences. + + Args: + reference (str): The reference sequence. + query (str): The query sequence. + + Returns: + List[str]: A list of pairwise numbering. + """ + + numbering = [] + reference_counter = 0 + query_counter = 1 + + for ref_pos, que_pos in zip(reference, query): + + if ref_pos == "-": + numbering.append(f"{reference_counter}.{query_counter}") + query_counter += 1 + else: + reference_counter += 1 + if que_pos != "-": + numbering.append(f"{reference_counter}.{query_counter}") + query_counter += 1 + else: + numbering.append(str(reference_counter)) + + print(numbering) + return numbering + + + def apply_standard_numbering( + self, + reference: Sequence, + ): + """ + Apply standard numbering to the aligned sequences. + + Args: + reference (Sequence, optional): The reference sequence to use for numbering. + If not provided, the first aligned sequence will be used as the reference. + Defaults to None. + + Raises: + ValueError: If the sequences are not aligned. + + """ + if not self.aligned_sequences: + raise ValueError( + "Sequences must be aligned first. Run the align() method first." + ) + + if reference not in self.aligned_sequences: + raise ValueError( + "Reference Sequence is not part of the aligned sequences. Please choose a new reference sequence!" + ) + + numbered_sequence = [] + for aligned_sequence in self.aligned_sequences: + + numbering = self._get_numbering_string( + reference=reference.sequence, query=aligned_sequence.sequence #type:ignore + ) + + numbered_sequence.append(NumberedSequence( + numbered_id = aligned_sequence.sequence_id, + numbering = numbering + ) + ) + + self.standard_numbering = StandardNumbering( + reference_id=reference.sequence_id, + numbered_sequences=numbered_sequence + ) diff --git a/pyeed/core/standardnumbering.py b/pyeed/core/standardnumbering.py index 95729740..07a13983 100644 --- a/pyeed/core/standardnumbering.py +++ b/pyeed/core/standardnumbering.py @@ -31,10 +31,10 @@ class StandardNumbering( json_schema_extra=dict(), ) - numberd_sequences: List[NumberedSequence] = element( + numbered_sequences: List[NumberedSequence] = element( description="Numbered sequence of the aligned sequence", default_factory=ListPlus, - tag="numberd_sequences", + tag="numbered_sequences", json_schema_extra=dict( multiple=True, ), @@ -59,7 +59,7 @@ def _parse_raw_xml_data(self): return self - def add_to_numberd_sequences( + def add_to_numbered_sequences( self, numbered_id: Optional[str] = None, numbering: List[str] = ListPlus(), @@ -85,6 +85,9 @@ def add_to_numberd_sequences( obj = NumberedSequence(**params) - self.numberd_sequences.append(obj) + self.numbered_sequences.append(obj) - return self.numberd_sequences[-1] + return self.numbered_sequences[-1] + + + \ No newline at end of file