Skip to content

Commit

Permalink
fixup! Add 3Di substitution matrix
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Oct 15, 2024
1 parent 977e44e commit d8be3a2
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 18 deletions.
44 changes: 26 additions & 18 deletions src/biotite/sequence/align/matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
# under the 3-Clause BSD License. Please see 'LICENSE.rst' for further
# information.

__all__ = ["SubstitutionMatrix"]
__name__ = "biotite.sequence.align"
__author__ = "Patrick Kunzmann"

import os
import functools
from pathlib import Path
import numpy as np
from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence

__all__ = ["SubstitutionMatrix"]
# Directory of matrix files
_DB_DIR = Path(__file__).parent / "matrix_data"


class SubstitutionMatrix(object):
Expand Down Expand Up @@ -128,9 +131,6 @@ class SubstitutionMatrix(object):
>>> matrix = SubstitutionMatrix(alph, alph, "BLOSUM50")
"""

# Directory of matrix files
_db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "matrix_data")

def __init__(self, alphabet1, alphabet2, score_matrix):
self._alph1 = alphabet1
self._alph2 = alphabet2
Expand Down Expand Up @@ -354,7 +354,7 @@ def dict_from_db(matrix_name):
matrix_dict : dict
A dictionary representing the substitution matrix.
"""
filename = SubstitutionMatrix._db_dir + os.sep + matrix_name + ".mat"
filename = _DB_DIR / f"{matrix_name}.mat"
with open(filename, "r") as f:
return SubstitutionMatrix.dict_from_str(f.read())

Expand All @@ -368,11 +368,10 @@ def list_db():
db_list : list
List of matrix names in the internal database.
"""
files = os.listdir(SubstitutionMatrix._db_dir)
# Remove '.mat' from files
return [file[:-4] for file in sorted(files)]
return [path.stem for path in _DB_DIR.glob("*.mat")]

@staticmethod
@functools.cache
def std_protein_matrix():
"""
Get the default :class:`SubstitutionMatrix` for protein sequence
Expand All @@ -383,9 +382,10 @@ def std_protein_matrix():
matrix : SubstitutionMatrix
Default matrix.
"""
return _matrix_blosum62
return ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"

@staticmethod
@functools.cache
def std_nucleotide_matrix():
"""
Get the default :class:`SubstitutionMatrix` for DNA sequence
Expand All @@ -396,13 +396,21 @@ def std_nucleotide_matrix():
matrix : SubstitutionMatrix
Default matrix.
"""
return _matrix_nuc
return NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"

@staticmethod
@functools.cache
def std_3di_matrix():
"""
Get the default :class:`SubstitutionMatrix` for 3Di sequence
alignments.
Returns
-------
matrix : SubstitutionMatrix
Default matrix.
"""
# Import inside function to avoid circular import
from biotite.structure.alphabet.i3d import I3DSequence

# Preformatted BLOSUM62 and NUC substitution matrix from NCBI
_matrix_blosum62 = SubstitutionMatrix(
ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62"
)
_matrix_nuc = SubstitutionMatrix(
NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC"
)
return I3DSequence.alphabet, I3DSequence.alphabet, "3Di"
15 changes: 15 additions & 0 deletions tests/sequence/align/test_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,21 @@ def test_structural_alphabet_matrices(matrix_name, alphabet):
align.SubstitutionMatrix(alphabet, alphabet, matrix_name)


@pytest.mark.parametrize(
"method_name",
[
"std_protein_matrix",
"std_nucleotide_matrix",
"std_3di_matrix",
],
)
def test_default_matrices(method_name):
"""
Test for exceptions when using the static methods for getting default matrices.
"""
getattr(align.SubstitutionMatrix, method_name)()


def test_matrix_str():
"""
Test conversion of substitution matrix to string via a small
Expand Down

0 comments on commit d8be3a2

Please sign in to comment.