From d8be3a273cdbb5943555d67515f529a97a793827 Mon Sep 17 00:00:00 2001 From: Patrick Kunzmann Date: Tue, 15 Oct 2024 17:50:29 +0200 Subject: [PATCH] fixup! Add 3Di substitution matrix --- src/biotite/sequence/align/matrix.py | 44 ++++++++++++++++------------ tests/sequence/align/test_matrix.py | 15 ++++++++++ 2 files changed, 41 insertions(+), 18 deletions(-) diff --git a/src/biotite/sequence/align/matrix.py b/src/biotite/sequence/align/matrix.py index bfdbc52a7..6afee019f 100644 --- a/src/biotite/sequence/align/matrix.py +++ b/src/biotite/sequence/align/matrix.py @@ -2,14 +2,17 @@ # under the 3-Clause BSD License. Please see 'LICENSE.rst' for further # information. +__all__ = ["SubstitutionMatrix"] __name__ = "biotite.sequence.align" __author__ = "Patrick Kunzmann" -import os +import functools +from pathlib import Path import numpy as np from biotite.sequence.seqtypes import NucleotideSequence, ProteinSequence -__all__ = ["SubstitutionMatrix"] +# Directory of matrix files +_DB_DIR = Path(__file__).parent / "matrix_data" class SubstitutionMatrix(object): @@ -128,9 +131,6 @@ class SubstitutionMatrix(object): >>> matrix = SubstitutionMatrix(alph, alph, "BLOSUM50") """ - # Directory of matrix files - _db_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "matrix_data") - def __init__(self, alphabet1, alphabet2, score_matrix): self._alph1 = alphabet1 self._alph2 = alphabet2 @@ -354,7 +354,7 @@ def dict_from_db(matrix_name): matrix_dict : dict A dictionary representing the substitution matrix. """ - filename = SubstitutionMatrix._db_dir + os.sep + matrix_name + ".mat" + filename = _DB_DIR / f"{matrix_name}.mat" with open(filename, "r") as f: return SubstitutionMatrix.dict_from_str(f.read()) @@ -368,11 +368,10 @@ def list_db(): db_list : list List of matrix names in the internal database. """ - files = os.listdir(SubstitutionMatrix._db_dir) - # Remove '.mat' from files - return [file[:-4] for file in sorted(files)] + return [path.stem for path in _DB_DIR.glob("*.mat")] @staticmethod + @functools.cache def std_protein_matrix(): """ Get the default :class:`SubstitutionMatrix` for protein sequence @@ -383,9 +382,10 @@ def std_protein_matrix(): matrix : SubstitutionMatrix Default matrix. """ - return _matrix_blosum62 + return ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62" @staticmethod + @functools.cache def std_nucleotide_matrix(): """ Get the default :class:`SubstitutionMatrix` for DNA sequence @@ -396,13 +396,21 @@ def std_nucleotide_matrix(): matrix : SubstitutionMatrix Default matrix. """ - return _matrix_nuc + return NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC" + + @staticmethod + @functools.cache + def std_3di_matrix(): + """ + Get the default :class:`SubstitutionMatrix` for 3Di sequence + alignments. + Returns + ------- + matrix : SubstitutionMatrix + Default matrix. + """ + # Import inside function to avoid circular import + from biotite.structure.alphabet.i3d import I3DSequence -# Preformatted BLOSUM62 and NUC substitution matrix from NCBI -_matrix_blosum62 = SubstitutionMatrix( - ProteinSequence.alphabet, ProteinSequence.alphabet, "BLOSUM62" -) -_matrix_nuc = SubstitutionMatrix( - NucleotideSequence.alphabet_amb, NucleotideSequence.alphabet_amb, "NUC" -) + return I3DSequence.alphabet, I3DSequence.alphabet, "3Di" diff --git a/tests/sequence/align/test_matrix.py b/tests/sequence/align/test_matrix.py index 05bb18acf..9455a1650 100644 --- a/tests/sequence/align/test_matrix.py +++ b/tests/sequence/align/test_matrix.py @@ -39,6 +39,21 @@ def test_structural_alphabet_matrices(matrix_name, alphabet): align.SubstitutionMatrix(alphabet, alphabet, matrix_name) +@pytest.mark.parametrize( + "method_name", + [ + "std_protein_matrix", + "std_nucleotide_matrix", + "std_3di_matrix", + ], +) +def test_default_matrices(method_name): + """ + Test for exceptions when using the static methods for getting default matrices. + """ + getattr(align.SubstitutionMatrix, method_name)() + + def test_matrix_str(): """ Test conversion of substitution matrix to string via a small