Skip to content

Commit

Permalink
Merge pull request #730 from davegrays/fix/allow-all-ccd-residues
Browse files Browse the repository at this point in the history
fix: allow ccd residues with missing coords
  • Loading branch information
padix-key authored Jan 8, 2025
2 parents 318db78 + 4e931fb commit 8138e0e
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 9 deletions.
13 changes: 11 additions & 2 deletions src/biotite/structure/info/atoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# fmt: on


def residue(res_name):
def residue(res_name, allow_missing_coord=False):
"""
Get an atom array, representing the residue with the given name.
Expand All @@ -30,6 +30,11 @@ def residue(res_name):
----------
res_name : str
The up to 3-letter name of the residue.
allow_missing_coord: bool, optional
Whether to allow missing coordinate values in the residue.
If ``True``, these will be represented as ``nan`` values.
If ``False``, a ``ValueError`` is raised when missing coordinates
are encountered.
Returns
-------
Expand Down Expand Up @@ -74,7 +79,11 @@ def residue(res_name):
from biotite.structure.io.pdbx import get_component

try:
component = get_component(get_ccd(), res_name=res_name)
component = get_component(
get_ccd(),
res_name=res_name,
allow_missing_coord=allow_missing_coord,
)
except KeyError:
raise KeyError(f"No atom information found for residue '{res_name}' in CCD")
component.hetero[:] = res_name not in NON_HETERO_RESIDUES
Expand Down
32 changes: 25 additions & 7 deletions src/biotite/structure/io/pdbx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -1185,7 +1185,13 @@ def _filter_canonical_links(array, bond_array):
) # fmt: skip


def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=None):
def get_component(
pdbx_file,
data_block=None,
use_ideal_coord=True,
res_name=None,
allow_missing_coord=False,
):
"""
Create an :class:`AtomArray` for a chemical component from the
``chem_comp_atom`` and, if available, the ``chem_comp_bond``
Expand Down Expand Up @@ -1213,6 +1219,11 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
In this case, the component with the given residue name is
read.
By default, all rows would be read in this case.
allow_missing_coord: bool, optional
Whether to allow missing coordinate values in components.
If ``True``, these will be represented as ``nan`` values.
If ``False``, a ``ValueError`` is raised when missing coordinates
are encountered.
Returns
-------
Expand Down Expand Up @@ -1303,7 +1314,8 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
else:
raise
array.coord = _parse_component_coordinates(
[atom_category[field] for field in alt_coord_fields]
[atom_category[field] for field in alt_coord_fields],
allow_missing=allow_missing_coord,
)

try:
Expand Down Expand Up @@ -1334,14 +1346,20 @@ def get_component(pdbx_file, data_block=None, use_ideal_coord=True, res_name=Non
return array


def _parse_component_coordinates(coord_columns):
def _parse_component_coordinates(coord_columns, allow_missing=False):
coord = np.zeros((len(coord_columns[0]), 3), dtype=np.float32)
for i, column in enumerate(coord_columns):
if column.mask is not None and column.mask.array.any():
raise ValueError(
"Missing coordinates for some atoms",
)
coord[:, i] = column.as_array(np.float32)
if allow_missing:
warnings.warn(
"Missing coordinates for some atoms. Those will be set to nan",
UserWarning,
)
else:
raise ValueError(
"Missing coordinates for some atoms",
)
coord[:, i] = column.as_array(np.float32, masked_value=np.nan)
return coord


Expand Down
21 changes: 21 additions & 0 deletions tests/structure/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,24 @@ def test_set_ccd_path(fake_ccd_path):

# The new fake CCD has only a single compound
assert strucinfo.all_residues() == ["FOO"]


@pytest.mark.parametrize(
"res_name, allow_missing_coord",
[
("ALA", False),
("A1IQW", True),
("RRE", True),
],
)
def test_residue(res_name, allow_missing_coord):
"""
Test if the residue function returns an atom array or not.
ALA --> standard amino acid, yes even when allow_missing_coord=False
A1IQW --> yes only with allow_missing_coord=True (as of Jan 6, 2025)
RRE --> yes only with allow_missing_coord=True (as of Jan 6, 2025)
"""
result = strucinfo.residue(res_name, allow_missing_coord=allow_missing_coord)
assert isinstance(result, struc.AtomArray)
assert result.array_length() > 0
assert np.all(result.res_name == res_name)

0 comments on commit 8138e0e

Please sign in to comment.