Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Infer missing elements for consistency with Biotite #19

Merged
merged 1 commit into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions python-src/fastpdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,18 @@ def get_structure(self, model=None, altloc="first", extra_fields=None, include_b
element = np.frombuffer(element, dtype="U2")
altloc_id = np.frombuffer(altloc_id, dtype="U1")

# Replace empty strings for elements with guessed types
# This is used e.g. for PDB files created by Gromacs
empty_element_mask = element == ""
if empty_element_mask.any():
warnings.warn(
f"{np.count_nonzero(empty_element_mask)} elements "
"were guessed from atom name"
)
element[empty_element_mask] = struc.infer_elements(
atom_name[empty_element_mask]
)

if coord.ndim == 3:
atoms = struc.AtomArrayStack(coord.shape[0], coord.shape[1])
atoms.coord = coord
Expand Down
66 changes: 41 additions & 25 deletions tests/test_fastpdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,22 @@
"""

import itertools
import glob
from io import StringIO
from os.path import join, dirname, realpath
from pathlib import Path
import pytest
import biotite
import numpy as np
import biotite.structure.io.pdb as pdb
import fastpdb as fastpdb

DATA_PATH = join(dirname(realpath(__file__)), "data")
TEST_STRUCTURES = glob.glob(join(DATA_PATH, "*.pdb"))
DATA_PATH = Path(__file__).parent / "data"
TEST_STRUCTURES = list(DATA_PATH.glob("*.pdb"))


def test_get_remark():
ref_file = pdb.PDBFile.read(join(DATA_PATH, "1aki.pdb"))
test_file = fastpdb.PDBFile.read(join(DATA_PATH, "1aki.pdb"))
ref_file = pdb.PDBFile.read(DATA_PATH / "1aki.pdb")

test_file = fastpdb.PDBFile.read(DATA_PATH / "1aki.pdb")

for remark in np.arange(0, 1000):
assert test_file.get_remark(remark) == ref_file.get_remark(remark)
Expand All @@ -35,9 +34,9 @@ def test_get_remark():
)
def test_get_model_count(path):
ref_file = pdb.PDBFile.read(path)

test_file = fastpdb.PDBFile.read(path)


assert ref_file.get_model_count() == test_file.get_model_count()

Expand All @@ -60,7 +59,7 @@ def test_get_coord(path, model):
return
else:
raise

test_file = fastpdb.PDBFile.read(path)
test_coord = test_file.get_coord(model)

Expand All @@ -83,8 +82,8 @@ def test_get_structure(path, model, altloc, extra_fields, include_bonds):
extra_fields = ["atom_id", "b_factor", "occupancy", "charge"]
else:
extra_fields = None


ref_file = pdb.PDBFile.read(path)
try:
ref_atoms = ref_file.get_structure(
Expand All @@ -98,28 +97,27 @@ def test_get_structure(path, model, altloc, extra_fields, include_bonds):
else:
raise


test_file = fastpdb.PDBFile.read(path)
test_atoms = test_file.get_structure(
model, altloc, extra_fields, include_bonds
)


if ref_atoms.box is not None:
assert np.allclose(test_atoms.box, ref_atoms.box)
else:
assert test_atoms.box is None

assert test_atoms.bonds == ref_atoms.bonds

for category in ref_atoms.get_annotation_categories():
if np.issubdtype(ref_atoms.get_annotation(category).dtype, float):
assert test_atoms.get_annotation(category).tolist() \
== pytest.approx(ref_atoms.get_annotation(category).tolist())
else:
assert test_atoms.get_annotation(category).tolist() \
== ref_atoms.get_annotation(category).tolist()

assert np.allclose(test_atoms.coord, ref_atoms.coord)


Expand All @@ -138,8 +136,7 @@ def test_set_structure(path, model, altloc, extra_fields, include_bonds):
extra_fields = ["atom_id", "b_factor", "occupancy", "charge"]
else:
extra_fields = None



input_file = pdb.PDBFile.read(path)
try:
atoms = input_file.get_structure(
Expand All @@ -153,7 +150,6 @@ def test_set_structure(path, model, altloc, extra_fields, include_bonds):
else:
raise


ref_file = pdb.PDBFile()
ref_file.set_structure(atoms)
ref_file_content = StringIO()
Expand All @@ -164,7 +160,6 @@ def test_set_structure(path, model, altloc, extra_fields, include_bonds):
test_file_content = StringIO()
test_file.write(test_file_content)


assert test_file_content.getvalue() == ref_file_content.getvalue()


Expand All @@ -174,8 +169,29 @@ def test_get_assembly():
as `get_assembly()` is not explicitly implemented in
`fastpdb.PDBFile`.
"""
ref_file = pdb.PDBFile.read(join(DATA_PATH, "1aki.pdb"))

test_file = fastpdb.PDBFile.read(join(DATA_PATH, "1aki.pdb"))
ref_file = pdb.PDBFile.read(DATA_PATH / "1aki.pdb")

test_file = fastpdb.PDBFile.read(DATA_PATH / "1aki.pdb")

assert test_file.get_assembly() == ref_file.get_assembly()


@pytest.mark.filterwarnings("ignore")
def test_inferred_elements(tmp_path):
# Read valid pdb file
pdb_file = fastpdb.PDBFile.read(DATA_PATH / "1l2y.pdb")
atoms = pdb_file.get_structure()
# Remove all elements
atoms_wo_elements = atoms.copy()
atoms_wo_elements.element[:] = ''
# Save stack without elements to file
temp = tmp_path / "tmp.pdb"
tmp_pdb_file = pdb.PDBFile()
tmp_pdb_file.set_structure(atoms_wo_elements)
tmp_pdb_file.write(temp)

# Read new stack from file with guessed elements
guessed_pdb_file = fastpdb.PDBFile.read(temp)
atoms_guessed_elements = guessed_pdb_file.get_structure()

assert test_file.get_assembly() == ref_file.get_assembly()
assert atoms_guessed_elements.element.tolist() == atoms.element.tolist()
Loading