Skip to content

Commit

Permalink
added docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
haeussma committed May 10, 2024
1 parent 05a0306 commit d9135db
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 56 deletions.
6 changes: 3 additions & 3 deletions examples/basics.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "b8add0338418422da23a9aed0f7164fa",
"model_id": "f9aacdf51dc04cc0b19f3e296aa52dbb",
"version_major": 2,
"version_minor": 0
},
Expand Down Expand Up @@ -12163,7 +12163,7 @@
" ...]"
]
},
"execution_count": 24,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
Expand Down
89 changes: 36 additions & 53 deletions pyeed/align/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,25 @@
from Bio.Align import PairwiseAligner as BioPairwiseAligner
from Bio.Align.substitution_matrices import Array as BioSubstitutionMatrix
from joblib import Parallel, cpu_count, delayed
from rich.console import Console
from rich.progress import Progress

from pyeed.core.pairwisealignmentresult import PairwiseAlignmentResult


class PairwiseAligner:
def __init__(self, mode: str) -> None:
def __init__(
self,
mode: str = "global",
match: int = 1,
mismatch: int = -1,
gap_open: int = -1,
gap_exted: int = 0,
substitution_matrix: str = "None",
) -> None:
self.mode = mode
self.match = 1
self.mismatch = -1
self.gap_open = -1
self.gap_extend = 0
self.substitution_matrix = "None"
self.match = match
self.mismatch = mismatch
self.gap_open = gap_open
self.gap_extend = gap_exted
self.substitution_matrix = substitution_matrix

def _align(
self,
Expand Down Expand Up @@ -47,36 +52,41 @@ def align_pairwise(
self,
seq1: Dict[str, str],
seq2: Dict[str, str],
) -> PairwiseAlignmentResult:
) -> dict:
"""Aligns two sequences and returns the alignment results.
Args:
seq1 (Dict[str, str]): Sequence 1 to align. Key is the sequence ID.
seq2 (Dict[str, str]): Sequence 2 to align. Key is the sequence ID.
Returns:
PairwiseAlignmentResult: The alignment results.
dict: Has the same signature as a `PairwiseAlignmentResult` object.
"""

alignment_result = self._align(seq1, seq2)

return self._map_alignment_results(alignment_result, seq1, seq2)

def align_multipairwise(self, sequences: Dict[str, str], **kwargs) -> List[dict]:
""" """
# the sequences are stored in a dictionary, key = source.id value = sequence
# this creates a list of all possible pairs of sequences, on this list the alignment has to happen
pairs = list(combinations(sequences.keys(), 2))
"""Creates all possible pairwise alignments from a dictionary of sequences.
progress_bar = Progress(console=Console())
Args:
sequences (Dict[str, str]): A dictionary of sequences to align. The key is the sequence ID.
alignments = []
with progress_bar as progress:
Returns:
List[dict]: A list of dictionaries containing the alignment results.
"""

pairs = list(combinations(sequences.keys(), 2))

with Progress() as progress:
alignments = Parallel(n_jobs=cpu_count(), prefer="processes")(
delayed(self.align_pairwise)(
{pair[0]: sequences[pair[0]]}, {pair[1]: sequences[pair[1]]}
)
for pair in progress.track(pairs, description="⛓️ Aligning sequences...")
for pair in progress.track(
pairs, description=f"⛓️ Aligning {len(pairs)} sequence pairs..."
)
)

return alignments
Expand All @@ -98,9 +108,13 @@ def _get_aligner(self) -> BioPairwiseAligner:

def _map_alignment_results(
self, alignment: Alignment, seq1: Dict[str, str], seq2: Dict[str, str]
) -> PairwiseAlignmentResult:
# this maps the alignment results to a dictionary
# this dictionary is used to create the network graph
) -> dict:
"""Maps the alignment results to a dictionary.
The dictionaly has the same signature as a `PairwiseAlignmentResult` object.
Returns:
dict: A dictionary containing the alignment results.
"""

shorter_seq = min(alignment[0], alignment[1], key=lambda x: len(x))

Expand Down Expand Up @@ -128,40 +142,9 @@ def _map_alignment_results(
"aligned_sequences": aligned_sequences,
}

# alignment_dic = {
# "seq1": alignment[0],
# "seq2": alignment[1],
# "seq1_id": seq1_id,
# "seq2_id": seq2_id,
# "score": alignment.score,
# "mismatches": 1 / (alignment.counts().mismatches + 1),
# "gaps": 1 / (alignment.counts().gaps + 1),
# "identity": identity,
# "start": alignment.aligned[0],
# "end": alignment.aligned[1],
# }

return result_dict

def _load_substitution_matrix(self) -> "BioSubstitutionMatrix":
from Bio.Align import substitution_matrices

return substitution_matrices.load(self.substitution_matrix)


if __name__ == "__main__":
aligner = PairwiseAligner(mode="global")
seq1 = dict(s1="ATCG")
seq2 = dict(s2="ATCC")
seq3 = dict(s3="GGCC")
seq4 = dict(s4="GGGC")

all_seq = {**seq1, **seq2, **seq3, **seq4}

print(aligner.align_pairwise(seq1=seq1, seq2=seq2))

alignment_result = aligner.align_multipairwise(sequences=all_seq)

print(alignment_result)

print(alignment_result)

0 comments on commit d9135db

Please sign in to comment.