diff --git a/doc/examples/scripts/structure/ku_superimposition.py b/doc/examples/scripts/structure/ku_superimposition.py index 7eb5a91a9..0913a206d 100644 --- a/doc/examples/scripts/structure/ku_superimposition.py +++ b/doc/examples/scripts/structure/ku_superimposition.py @@ -47,7 +47,7 @@ ) # We do not want the cropped structures # -> apply superimposition on original structures -ku_superimposed = struc.superimpose_apply(ku, transformation) +ku_superimposed = transformation.apply(ku) # Write PDBx files as input for PyMOL cif_file = pdbx.PDBxFile() pdbx.set_structure(cif_file, ku_dna, data_block="ku_dna") diff --git a/doc/examples/scripts/structure/md_analysis.py b/doc/examples/scripts/structure/md_analysis.py index 7d02d9588..8c1b46139 100644 --- a/doc/examples/scripts/structure/md_analysis.py +++ b/doc/examples/scripts/structure/md_analysis.py @@ -71,9 +71,9 @@ # For this purpose we take the RMSD of a frame compared to the initial # model as measure. In order to calculate the RMSD we must # superimpose all models onto a reference, in this case we also choose -# the initial structure. +# the initial structure. -trajectory, transform = struc.superimpose(trajectory[0], trajectory) +trajectory, _ = struc.superimpose(trajectory[0], trajectory) rmsd = struc.rmsd(trajectory[0], trajectory) figure = plt.figure(figsize=(6,3)) @@ -90,7 +90,7 @@ # As we can see the simulation seems to converge already early in the # simulation. # After a about 200 ps the RMSD stays in a range of approx. 1 - 2 Å. -# +# # In order to futher evaluate the unfolding of our enzyme in the # course of simulation, we calculate and plot the radius of gyration # (a measure for the protein radius). @@ -110,7 +110,7 @@ # From this perspective, the protein seems really stable. # The radius does merely fluctuate in a range of approximately 0.3 Å # during the entire simulation. -# +# # Let's have a look at single amino acids: # Which residues fluctuate most? # For answering this question we calculate the RMSF @@ -120,7 +120,7 @@ # each residue. # Usually the average model is taken as reference # (compared to the starting model for RMSD). -# +# # Since side chain atoms fluctuate quite a lot, they are not suitable # for evaluation of the residue flexibility. Therefore, we consider only # CA atoms. diff --git a/doc/examples/scripts/structure/peptide_assembly.py b/doc/examples/scripts/structure/peptide_assembly.py index 7e6be4faf..6a9da1bff 100644 --- a/doc/examples/scripts/structure/peptide_assembly.py +++ b/doc/examples/scripts/structure/peptide_assembly.py @@ -223,7 +223,7 @@ def assemble_peptide(sequence): backbone_coord[3*i : 3*i + 3], residue.coord[np.isin(residue.atom_name, ["N", "CA", "C"])] ) - residue = struc.superimpose_apply(residue, transformation) + residue = transformation.apply(residue) chain = append_residue(chain, residue) @@ -241,9 +241,7 @@ def assemble_peptide(sequence): chain.coord[[ca_i, c_i, n_i]], peptide_coord[:3] ) - chain.coord[[o_i, h_i]] = struc.superimpose_apply( - peptide_coord[3:], transformation - ) + chain.coord[[o_i, h_i]] = transformation.apply(peptide_coord[3:]) return chain diff --git a/doc/tutorial/src/structure.py b/doc/tutorial/src/structure.py index ed29c243b..39cef7472 100644 --- a/doc/tutorial/src/structure.py +++ b/doc/tutorial/src/structure.py @@ -3,7 +3,7 @@ =================================== .. currentmodule:: biotite.structure - + :mod:`biotite.structure` is a *Biotite* subpackage for handling molecular structures. This subpackage enables efficient and easy handling of protein structure @@ -22,13 +22,13 @@ differs in the atom coordinates. Both, :class:`AtomArray` and :class:`AtomArrayStack`, store the attributes in `NumPy` arrays. This approach has multiple advantages: - + - Convenient selection of atoms in a structure by using *NumPy* style indexing - Fast calculations on structures using C-accelerated :class:`ndarray` operations - Simple implementation of custom calculations - + Based on the implementation using :class:`ndarray` objects, this package also contains functions for structure analysis and manipulation. @@ -68,7 +68,7 @@ # In most cases you won't work with :class:`Atom` instances and in even # fewer cases :class:`Atom` instances are created as it is done in the # above example. -# +# # If you want to work with an entire molecular structure, containing an # arbitrary amount of atoms, you have to use so called atom arrays. # An atom array can be seen as an array of atom instances @@ -124,7 +124,7 @@ # The latter example is incorrect, as it creates a subarray of the # initial :class:`AtomArray` (discussed later) and then tries to # replace the annotation array with the new value. -# +# # If you want to add further annotation categories to an array, you have # to call the :func:`add_annotation()` or :func:`set_annotation()` # method at first. After that you can access the new annotation array @@ -159,7 +159,7 @@ ######################################################################## # Loading structures from file # ---------------------------- -# +# # Usually structures are not built from scratch, but they are read from # a file. # Probably the most popular structure file format is the *PDB* format. @@ -169,7 +169,7 @@ # elucidated via NMR. # Thus, the corresponding PDB file consists of multiple (namely 38) # models, each showing another conformation. -# +# # .. currentmodule:: biotite.structure.io.pdb # # At first we load the structure from a PDB file via the class @@ -211,9 +211,9 @@ # restrictions. # Furthermore, much more additional information is stored in these # files. -# +# # .. currentmodule:: biotite.structure.io.pdbx -# +# # In contrast to PDB files, *Biotite* can read the entire content of # PDBx/mmCIF files, which can be accessed in a dictionary like manner. # At first, we read the file similarily to before, but this time we @@ -265,7 +265,7 @@ # single model. # If you would like to have an :class:`AtomArray` instead, you have to # specifiy the :obj:`model` parameter. -# +# # .. currentmodule:: biotite.structure.io.mmtf # # If you want to parse a large batch of structure files or you have to @@ -335,7 +335,7 @@ ######################################################################## # .. currentmodule:: biotite.structure.io.npz -# +# # An alternative file format for storing and loading atom arrays and # stacks even faster, is the *NPZ* format. # The big disadvantage is that the format is *Biotite*-exclusive: @@ -357,9 +357,9 @@ ######################################################################## # There are also some other supported file formats. # For a full list, have a look at :mod:`biotite.structure.io`. -# +# # .. currentmodule:: biotite.structure.io -# +# # Since programmers are usually lazy and do not want to write more code # than necessary, there are two convenient function for loading and # saving atom arrays or stacks, unifying the forementioned file formats: @@ -383,7 +383,7 @@ ######################################################################## # Reading trajectory files # ^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # If the package *MDtraj* is installed, *Biotite* provides a read/write # interface for different trajectory file formats. # All supported trajectory formats have in common, that they store @@ -438,7 +438,7 @@ ######################################################################## # Array indexing and filtering # ---------------------------- -# +# # .. currentmodule:: biotite.structure # # Atom arrays and stacks can be indexed in a similar way a @@ -533,7 +533,7 @@ ######################################################################## # If you would like to know which atoms are in proximity to specific # coordinates, have a look at the :class:`CellList` class. -# +# # .. warning:: For annotation editing Since :class:`AnnotatedSequence` objects use base position # indices and :class:`Sequence` objects use array position indices, # you will get different results for ``annot_seq[n:m].sequence`` and @@ -541,12 +541,12 @@ # # Representing bonds # ------------------ -# +# # Up to now we only looked into atom arrays whose atoms are merely # described by its coordinates and annotations. # But there is more: Chemical bonds can be described, too, using a # :class:`BondList`! -# +# # Consider the following case: Your atom array contains four atoms: # *N*, *CA*, *C* and *CB*. *CA* is a central atom that is connected to # *N*, *C* and *CB*. @@ -555,7 +555,7 @@ # in a corresponding atom array. # The pairs indicate which atoms share a bond. # Additionally, it is required to specify the number of atoms in the -# atom array. +# atom array. from tempfile import gettempdir import biotite.structure as struc @@ -614,17 +614,17 @@ ######################################################################## # As you see, the the bonds involving the *C* (only a single one) are # removed and the remaining indices are shifted. -# +# # Connecting atoms and bonds # ^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # We do not have to index the atom array and the bond list # separately. # For convenience reasons you can associate a :class:`BondList` to an # :class:`AtomArray` via the ``bonds`` attribute. # If no :class:`BondList` is associated, ``bonds`` is ``None``. # Every time the atom array is indexed, the index is also applied to the -# associated bond list. +# associated bond list. # The same behavior applies to concatenations, by the way. array.bonds = bond_list @@ -634,7 +634,7 @@ ######################################################################## # Note, that some functionalities in *Biotite* even require that the -# input structure has an associated :class:`BondList`. +# input structure has an associated :class:`BondList`. # # Reading and writing bonds # ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -720,7 +720,7 @@ ######################################################################## # An atom array can have an associated box, which is used in functions, # that consider periodic boundary conditions. -# Atom array stacks require a *(m,3,3)*-shaped :class:`ndarray`, +# Atom array stacks require a *(m,3,3)*-shaped :class:`ndarray`, # that contains the box vectors for each model. # The box is accessed via the `box` attribute, which is ``None`` by # default. @@ -785,10 +785,10 @@ ######################################################################## # For a complete list of transformation functions have a look in the # :doc:`API reference `. -# +# # Structure analysis # ------------------ -# +# # This package would be almost useless, if there wasn't some means to # analyze your structures. # Therefore, *Biotite* offers a bunch of functions for this purpose, @@ -797,15 +797,15 @@ # secondary structure. # The following section will introduce you to some of these functions, # which should be applied to that good old structure of *TC5b*. -# +# # The examples shown in this section are only a small glimpse into the # *structure* analysis toolset. # Have a look into the :doc:`API reference ` # for more information. -# +# # Geometry measures # ^^^^^^^^^^^^^^^^^ -# +# # Let's start with measuring some simple geometric characteristics, # for example atom distances of CA atoms. @@ -845,7 +845,7 @@ # Like some other functions in :mod:`biotite.structure`, we are able to # pick any combination of an atom, atom array or stack. Alternatively # :class:`ndarray` objects containing the coordinates can be provided. -# +# # Furthermore, we can measure bond angles and dihedral angles. # Calculate angle between first 3 CA atoms in first frame @@ -863,7 +863,7 @@ # distance/angle should be calculated. # Both variants can be setup to consider periodic boundary conditions # by setting the `box` or `periodic` parameter, respectively. -# +# # In some cases one is interested in the dihedral angles of the peptide # backbone, :math:`\phi`, :math:`\psi` and :math:`\omega`. # In the following code snippet we measure these angles and create a @@ -884,7 +884,7 @@ ######################################################################## # Comparing structures # ^^^^^^^^^^^^^^^^^^^^ -# +# # Now we want to calculate a measure of flexibility for each residue in # *TC5b*. The *root mean square fluctuation* (RMSF) is a good value for # that. @@ -900,7 +900,7 @@ # We consider only CA atoms stack = stack[:, stack.atom_name == "CA"] # Superimposing all models of the structure onto the first model -stack, transformation_tuple = struc.superimpose(stack[0], stack) +stack, transformation = struc.superimpose(stack[0], stack) print("RMSD for each model to first model:") print(struc.rmsd(stack[0], stack)) # Calculate the RMSF relative to the average of all models @@ -914,10 +914,10 @@ ######################################################################## # As you can see, both terminal residues are most flexible. -# +# # Calculating accessible surface area # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # Another interesting value for a protein structure is the # *solvent accessible surface area* (SASA) that indicates whether an # atom or residue is on the protein surface or buried inside the @@ -926,7 +926,7 @@ # atom. # Then we sum up the values for each residue, to get the # residue-wise SASA. -# +# # Besides other parameters, you can choose between different # Van-der-Waals radii sets: # *Prot0r*, the default set, is a set that defines radii for @@ -953,7 +953,7 @@ ######################################################################## # Secondary structure determination # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -# +# # *Biotite* can also be used to assign # *secondary structure elements* (SSE) to a structure with the # :func:`annotate_sse()` function. diff --git a/src/biotite/structure/basepairs.py b/src/biotite/structure/basepairs.py index 272d3dddc..371477cd0 100644 --- a/src/biotite/structure/basepairs.py +++ b/src/biotite/structure/basepairs.py @@ -15,7 +15,7 @@ import warnings from enum import IntEnum from .atoms import Atom, array -from .superimpose import superimpose, superimpose_apply +from .superimpose import superimpose from .filter import filter_nucleotides from .celllist import CellList from .hbond import hbond @@ -386,7 +386,7 @@ def base_pairs_edge(atom_array, base_pairs): References ---------- - + .. footbibliography:: """ # Result-``ndarray`` matches the dimensions of the input array @@ -537,7 +537,7 @@ def base_pairs_glycosidic_bond(atom_array, base_pairs): References ---------- - + .. footbibliography:: """ results = np.zeros(len(base_pairs), dtype='uint8') @@ -679,7 +679,7 @@ def base_stacking(atom_array, min_atoms_per_base=3): References ---------- - + .. footbibliography:: """ # Get the stacking candidates according to a cutoff distance, where @@ -846,7 +846,7 @@ def base_pairs(atom_array, min_atoms_per_base = 3, unique = True): References ---------- - + .. footbibliography:: """ @@ -1190,12 +1190,7 @@ def _match_base(nucleotide, min_atoms_per_base): # Match the selected std_base to the base. _, transformation = superimpose(nucleotide_matched, std_base_matched) - - # Transform the vectors - trans1, rot, trans2 = transformation - vectors += trans1 - vectors = np.dot(rot, vectors.T).T - vectors += trans2 + vectors = transformation.apply(vectors) # Normalize the base-normal-vector vectors[1,:] = vectors[1,:]-vectors[0,:] norm_vector(vectors[1,:]) @@ -1255,7 +1250,7 @@ def map_nucleotide(residue, min_atoms_per_base=3, rmsd_cutoff=0.28): References ---------- - + .. footbibliography:: """ # Check if the residue is a 'standard' nucleotide diff --git a/src/biotite/structure/superimpose.py b/src/biotite/structure/superimpose.py index a36f17fe9..acdb1ee8e 100755 --- a/src/biotite/structure/superimpose.py +++ b/src/biotite/structure/superimpose.py @@ -8,24 +8,86 @@ __name__ = "biotite.structure" __author__ = "Patrick Kunzmann, Claude J. Rogers" -__all__ = ["superimpose", "superimpose_apply"] +__all__ = ["superimpose", "superimpose_apply", "AffineTransformation"] + import numpy as np from .atoms import coord from .geometry import centroid +class AffineTransformation: + """ + An affine transformation, consisting of translations and a rotation. + + Parameters + ---------- + center_translation : ndarray, shape=(3,) or shape=(m,3), dtype=float + The translation vector for moving the centroid into the + origin. + rotation : ndarray, shape=(3,3) or shape=(m,3,3), dtype=float + The rotation matrix. + target_translation : ndarray, shape=(m,3), dtype=float + The translation vector for moving the structure onto the + fixed one. + + Attributes + ---------- + center_translation, rotation, target_translation : ndarray + Same as the parameters. + """ + def __init__(self, center_translation, rotation, target_translation): + self.center_translation = center_translation + self.rotation = rotation + self.target_translation = target_translation + + + def apply(self, atoms): + """ + Apply this transformation on the given structure. + + Parameters + ---------- + atoms : AtomArray or AtomArrayStack or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float + The structure to apply the transformation on. + + Returns + ------- + transformed : AtomArray or AtomArrayStack or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float + A copy of the `atoms` structure, + with transformations applied. + Only coordinates are returned, if coordinates were given in + `atoms`. + """ + mobile_coord = coord(atoms) + original_shape = mobile_coord.shape + mobile_coord = _reshape_to_3d(mobile_coord) + + superimposed_coord = mobile_coord.copy() + superimposed_coord += self.center_translation[:, np.newaxis, :] + superimposed_coord = _multi_matmul(self.rotation, superimposed_coord) + superimposed_coord += self.target_translation[:, np.newaxis, :] + + superimposed_coord = superimposed_coord.reshape(original_shape) + if isinstance(atoms, np.ndarray): + return superimposed_coord + else: + superimposed = atoms.copy() + superimposed.coord = superimposed_coord + return superimposed + + def superimpose(fixed, mobile, atom_mask=None): """ Superimpose structures onto a fixed structure. - + The superimposition is performed using the Kabsch algorithm :footcite:`Kabsch1976, Kabsch1978`, so that the RMSD between the superimposed and the fixed structure is minimized. - + Parameters ---------- - fixed : AtomArray, shape(n,) or ndarray, shape(n,), dtype=float + fixed : AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float The fixed structure. Alternatively coordinates can be given. mobile: AtomArray, shape(n,) or AtomArrayStack, shape(m,n) or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float @@ -41,7 +103,7 @@ def superimpose(fixed, mobile, atom_mask=None): on the covered atoms instead of all atoms. The returned superimposed structure will contain all atoms of the input structure, regardless of this parameter. - + Returns ------- fitted : AtomArray or AtomArrayStack or ndarray, shape(n,), dtype=float or ndarray, shape(m,n), dtype=float @@ -49,54 +111,43 @@ def superimpose(fixed, mobile, atom_mask=None): superimposed on the fixed structure. Only coordinates are returned, if coordinates were given in `mobile`. - transformation : tuple or tuple list - The tuple contains the transformations that were applied on - `mobile`. This can be used in `apply_superimposition()` in order - to transform another AtomArray in the same way. - The first element contains the translation vector for moving the - centroid into the origin. - The second element contains the rotation matrix. - The third element contains the translation vector for moving the - structure onto the fixed. - The three transformations are performed sequentially. - - See Also - -------- - superimpose_apply - + transformation : AffineTransformation + This object contains the affine transformation(s) that were + applied on `mobile`. + :meth:`AffineTransformation.apply()` can be used to transform + another AtomArray in the same way. + Notes ----- - The `transformation` tuple can be used in - :func:`superimpose_apply()` in order to transform another - :class:`AtomArray` in the same way. - This can come in handy, in case you want to superimpose two + The `transformation` can come in handy, in case you want to + superimpose two structures with different amount of atoms. Often the two structures need to be filtered in order to obtain the same size and annotation arrays. After superimposition the transformation can be applied on the - original structure using :func:`superimpose_apply()`. - + original structure using :meth:`AffineTransformation.apply()`. + References ---------- - + .. footbibliography:: - + Examples -------- - + At first two models of a structure are taken and one of them is randomly rotated/translated. Consequently the RMSD is quite large: - + >>> array1 = atom_array_stack[0] >>> array2 = atom_array_stack[1] >>> array2 = translate(array2, [1,2,3]) >>> array2 = rotate(array2, [1,2,3]) >>> print("{:.3f}".format(rmsd(array1, array2))) 11.260 - + RMSD decreases after superimposition of only CA atoms: - + >>> array2_fit, transformation = superimpose( ... array1, array2, atom_mask=(array2.atom_name == "CA") ... ) @@ -110,95 +161,48 @@ def superimpose(fixed, mobile, atom_mask=None): >>> print("{:.3f}".format(rmsd(array1, array2_fit))) 1.928 """ - - m_coord = coord(mobile) - f_coord = coord(fixed) - mshape = m_coord.shape - mdim = m_coord.ndim - if f_coord.ndim != 2: - raise ValueError("Expected fixed array to be an AtomArray") - if mdim < 2: - raise ValueError( - "Expected mobile array to be an AtomArray or AtomArrayStack" - ) - if mdim == 2: - # normalize inputs. Fixed coords has shape (n, 3) - # and mobile has shape (m, n, 3) - m_coord = m_coord[np.newaxis, ...] - - nmodels = m_coord.shape[0] - if f_coord.shape[0] != m_coord.shape[1]: - raise ValueError( - f"Expected fixed array and mobile array to have the same number " - f"of atoms, but {f_coord.shape[0]} != {m_coord.shape[1]}" - ) + # Bring coordinates into the same dimensionality + mob_coord = _reshape_to_3d(coord(mobile)) + fix_coord = _reshape_to_3d(coord(fixed)) if atom_mask is not None: # Implicitly this creates array copies - mob_filtered = m_coord[..., atom_mask, :] - fix_filtered = f_coord[atom_mask, :] + mob_filtered = mob_coord[:, atom_mask, :] + fix_filtered = fix_coord[:, atom_mask, :] else: - mob_filtered = np.copy(m_coord) - fix_filtered = np.copy(f_coord) - + mob_filtered = np.copy(mob_coord) + fix_filtered = np.copy(fix_coord) + # Center coordinates at (0,0,0) mob_centroid = centroid(mob_filtered) fix_centroid = centroid(fix_filtered) - mob_filtered -= mob_centroid[..., np.newaxis, :] - fix_filtered -= fix_centroid - - s_coord = m_coord.copy() - mob_centroid[..., np.newaxis, :] - # Perform Kabsch algorithm for every model - transformations = [None] * nmodels - for i in range(nmodels): - rotation = _superimpose(fix_filtered, mob_filtered[i]) - s_coord[i] = np.dot(rotation, s_coord[i].T).T - transformations[i] = (-mob_centroid[i], rotation, fix_centroid) - s_coord += fix_centroid - - if isinstance(mobile, np.ndarray): - superimposed = s_coord.reshape(mshape) - else: - superimposed = mobile.copy() - superimposed.coord = s_coord.reshape(mshape) + mob_centered_filtered = mob_filtered - mob_centroid[:, np.newaxis, :] + fix_centered_filtered = fix_filtered - fix_centroid[:, np.newaxis, :] - if mdim == 2: - return superimposed, transformations[0] - else: - return superimposed, transformations + rotation = _get_rotation_matrices( + fix_centered_filtered, mob_centered_filtered + ) + transform = AffineTransformation(-mob_centroid, rotation, fix_centroid) + return transform.apply(mobile), transform -def _superimpose(fix_centered, mob_centered): - """ - Perform the Kabsch algorithm using only the coordinates. +def superimpose_apply(atoms, transformation): """ - # Calculating rotation matrix - y = mob_centered - x = fix_centered - # Calculate covariance matrix - cov = np.dot(x.T, y) - v, s, w = np.linalg.svd(cov) - # Remove possibility of reflected atom coordinates - if np.linalg.det(v) * np.linalg.det(w) < 0: - v[:,-1] *= -1 - rotation = np.dot(v,w) - return rotation + Superimpose structures using a given :class:`AffineTransformation`. + The :class:`AffineTransformation` can be obtained by prior + superimposition. + + DEPRECATED: Use :func:`AffineTransformation.apply()` instead. -def superimpose_apply(atoms, transformation): - """ - Superimpose structures using a given transformation tuple. - - The transformation tuple is obtained by prior superimposition. - Parameters ---------- atoms : AtomArray or ndarray, shape(n,), dtype=float The structure to apply the transformation on. Alternatively coordinates can be given. - transformation: tuple, size=3 - The transformation tuple, obtained by :func:`superimpose()`. - + transformation: AffineTransformation + The transformation, obtained by :func:`superimpose()`. + Returns ------- fitted : AtomArray or AtomArrayStack @@ -206,20 +210,59 @@ def superimpose_apply(atoms, transformation): with transformations applied. Only coordinates are returned, if coordinates were given in `atoms`. - + See Also -------- superimpose """ - trans1, rot, trans2 = transformation - s_coord = coord(atoms).copy() - s_coord += trans1 - s_coord = np.dot(rot, s_coord.T).T - s_coord += trans2 - - if isinstance(atoms, np.ndarray): - return s_coord + return transformation.apply(atoms) + + +def _reshape_to_3d(coord): + """ + Reshape the coordinate array to 3D, if it is 2D. + """ + if coord.ndim < 2: + raise ValueError( + "Coordinates must be at least two-dimensional" + ) + if coord.ndim == 2: + return coord[np.newaxis, ...] + elif coord.ndim == 3: + return coord else: - transformed = atoms.copy() - transformed.coord = s_coord - return transformed \ No newline at end of file + raise ValueError( + "Coordinates must be at most three-dimensional" + ) + + +def _get_rotation_matrices(fixed, mobile): + """ + Get the rotation matrices to superimpose the given mobile + coordinates into the given fixed coordinates, minimizing the RMSD. + + Uses the *Kabsch* algorithm. + Both sets of coordinates must already be centered at origin. + """ + # Calculate cross-covariance matrices + cov = np.sum(fixed[:,:,:,np.newaxis] * mobile[:,:,np.newaxis,:], axis=1) + v, s, w = np.linalg.svd(cov) + # Remove possibility of reflected atom coordinates + reflected_mask = (np.linalg.det(v) * np.linalg.det(w) < 0) + v[reflected_mask, :, -1] *= -1 + matrices = np.matmul(v, w) + return matrices + + +def _multi_matmul(matrices, vectors): + """ + Calculate the matrix multiplication of m matrices + with m x n vectors. + """ + return np.transpose( + np.matmul( + matrices, + np.transpose(vectors, axes=(0, 2, 1)) + ), + axes=(0, 2, 1) + ) diff --git a/tests/database/test_rcsb.py b/tests/database/test_rcsb.py index 2bf19db98..ddd3919d5 100644 --- a/tests/database/test_rcsb.py +++ b/tests/database/test_rcsb.py @@ -86,9 +86,9 @@ def test_search_basic(): "rcsb_entity_source_organism.rcsb_gene_name.value", False, {"exact_match": "lacA"}, - ["5JUV", "1KQA", "1KRV", "1KRU", "1KRR", "1TG7", "1XC6", "3U7V", - "4IUG", "4LFK", "4LFL", "4LFM", "4LFN", "5IFP", "5IFT", "5IHR", - "4DUW", "5MGD", "5MGC"] + ["5JUV", "1KQA", "1KRV", "1KRU", "1KRR", "3U7V", "4IUG", "4LFK", + "4LFL", "4LFM", "4LFN", "5IFP", "5IFT", "5IHR", "4DUW", "5MGD", + "5MGC"] ), ( "struct.title", diff --git a/tests/structure/test_superimpose.py b/tests/structure/test_superimpose.py index 7f81aa372..86f8a07aa 100755 --- a/tests/structure/test_superimpose.py +++ b/tests/structure/test_superimpose.py @@ -6,14 +6,69 @@ import itertools from os.path import join import numpy as np +from numpy import sin, cos import pytest import biotite.structure as struc import biotite.structure.io as strucio -import biotite.database.rcsb as rcsb import biotite.structure as struc from ..util import data_dir +@pytest.mark.parametrize( + "seed, multi_model", itertools.product( + range(10), + [False, True] + ) +) +def test_restoration(seed, multi_model): + """ + Check if randomly relocated coordinates can be restored to their + original position by superimposition. + """ + N_MODELS = 10 + N_COORD = 100 + + np.random.seed(seed) + if multi_model: + ref_coord = np.random.rand(N_MODELS, N_COORD, 3) + else: + ref_coord = np.random.rand(N_COORD, 3) + ref_coord = _transform_random_affine(ref_coord) + + # Try to restore original coordinates after random relocation + test_coord = _transform_random_affine(ref_coord) + test_coord, _ = struc.superimpose(ref_coord, test_coord) + + assert test_coord.flatten().tolist() \ + == pytest.approx(ref_coord.flatten().tolist(), abs=1e-6) + + +def test_rotation_matrix(): + """ + Create randomly generated coordinates *a* and rotate them via a + rotation matrix to obtain new coordinates *b*. + ``superimpose(b, a)`` should give the rotation matrix. + """ + N_COORD = 100 + + # A rotation matrix that rotates 90 degrees around the z-axis + ref_rotation = np.array([ + [0, -1, 0], + [1, 0, 0], + [0, 0, 1] + ]) + + np.random.seed(0) + original_coord = np.random.rand(N_COORD, 3) + # Rotate about 90 degrees around z-axis + rotated_coord = struc.rotate(original_coord, angles=(0, 0, np.pi/2)) + _, transform = struc.superimpose(rotated_coord, original_coord) + test_rotation = transform.rotation + + assert test_rotation.flatten().tolist() \ + == pytest.approx(ref_rotation.flatten().tolist(), abs=1e-6) + + @pytest.mark.parametrize( "path, coord_only", itertools.product( glob.glob(join(data_dir("structure"), "*.mmtf")), @@ -28,11 +83,11 @@ def test_superimposition_array(path, coord_only): almost perfect match. """ fixed = strucio.load_structure(path, model=1) - + mobile = fixed.copy() mobile = struc.rotate(mobile, (1,2,3)) mobile = struc.translate(mobile, (1,2,3)) - + if coord_only: fixed = fixed.coord mobile = mobile.coord @@ -40,13 +95,13 @@ def test_superimposition_array(path, coord_only): fitted, transformation = struc.superimpose( fixed, mobile ) - + if coord_only: assert isinstance(fitted, np.ndarray) assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4) - + fitted = struc.superimpose_apply(mobile, transformation) - + if coord_only: assert isinstance(fitted, np.ndarray) assert struc.rmsd(fixed, fitted) == pytest.approx(0, abs=6e-4) @@ -67,9 +122,9 @@ def test_superimposition_stack(ca_only): mask = (mobile.atom_name == "CA") else: mask = None - + fitted, _ = struc.superimpose(fixed, mobile, mask) - + if ca_only: # The superimpositions are better for most cases than the # superimpositions in the structure file @@ -100,7 +155,7 @@ def test_masked_superimposition(seed): np.random.seed(seed) mask = np.full(fixed.array_length(), False) mask[np.random.randint(fixed.array_length())] = True - + # The distance between the atom in both models should not be # already 0 prior to superimposition assert struc.distance(fixed[mask], mobile[mask])[0] \ @@ -109,12 +164,12 @@ def test_masked_superimposition(seed): fitted, transformation = struc.superimpose( fixed, mobile, mask ) - + assert struc.distance(fixed[mask], fitted[mask])[0] \ == pytest.approx(0, abs=5e-4) - + fitted = struc.superimpose_apply(mobile, transformation) - + struc.distance(fixed[mask], fitted[mask])[0] \ == pytest.approx(0, abs=5e-4) @@ -131,15 +186,21 @@ def test_input_shapes(single_model, single_atom): path = join(data_dir("structure"), "1l2y.mmtf") stack = strucio.load_structure(path) fixed = stack[0] - + mobile = stack if single_model: mobile = mobile[:1, :] if single_atom: mobile = mobile[:, :1] fixed = fixed[:1] - + fitted, _ = struc.superimpose(fixed, mobile) assert type(fitted) == type(mobile) assert fitted.coord.shape == mobile.coord.shape + + +def _transform_random_affine(coord): + coord = struc.translate(coord, np.random.rand(3)) + coord = struc.rotate(coord, np.random.uniform(low=0, high=2*np.pi, size=3)) + return coord \ No newline at end of file