Skip to content

Commit

Permalink
Fix duplicate bonds in chem_comp_bond
Browse files Browse the repository at this point in the history
  • Loading branch information
padix-key committed Sep 1, 2024
1 parent ef5447c commit 1bc807a
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 10 deletions.
32 changes: 23 additions & 9 deletions src/biotite/structure/io/pdbx/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,25 +964,38 @@ def _set_intra_residue_bonds(array, atom_site):
aromatic_flag[i] = aromatic
any_mask = bond_array[:, 2] == BondType.ANY

chem_comp_bond = Category()
# Remove already existing residue and atom name combinations
# These appear when the structure contains a residue multiple times
atom_id_1 = array.atom_name[bond_array[:, 0]]
atom_id_2 = array.atom_name[bond_array[:, 1]]
# Take the residue name from the first atom index, as the residue
# name is the same for both atoms, since we have only intra bonds
chem_comp_bond["comp_id"] = array.res_name[bond_array[:, 0]]
chem_comp_bond["atom_id_1"] = array.atom_name[bond_array[:, 0]]
chem_comp_bond["atom_id_2"] = array.atom_name[bond_array[:, 1]]
comp_id = array.res_name[bond_array[:, 0]]
_, unique_indices = np.unique(
np.stack([comp_id, atom_id_1, atom_id_2], axis=-1), axis=0, return_index=True
)
unique_indices.sort()

chem_comp_bond = Category()
n_bonds = len(unique_indices)
chem_comp_bond["pdbx_ordinal"] = np.arange(1, n_bonds + 1, dtype=np.int32)
chem_comp_bond["comp_id"] = comp_id[unique_indices]
chem_comp_bond["atom_id_1"] = atom_id_1[unique_indices]
chem_comp_bond["atom_id_2"] = atom_id_2[unique_indices]
chem_comp_bond["value_order"] = Column(
value_order, np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
value_order[unique_indices],
np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
)
chem_comp_bond["pdbx_aromatic_flag"] = Column(
aromatic_flag, np.where(any_mask, MaskValue.MISSING, MaskValue.PRESENT)
aromatic_flag[unique_indices],
np.where(any_mask[unique_indices], MaskValue.MISSING, MaskValue.PRESENT),
)
# BondList does not contain stereo information
# -> all values are missing
chem_comp_bond["pdbx_stereo_config"] = Column(
np.zeros(len(bond_array), dtype="U1"),
np.full(len(bond_array), MaskValue.MISSING),
np.zeros(n_bonds, dtype="U1"),
np.full(n_bonds, MaskValue.MISSING),
)
chem_comp_bond["pdbx_ordinal"] = np.arange(1, len(bond_array) + 1, dtype=np.int32)
return chem_comp_bond


Expand All @@ -1007,6 +1020,7 @@ def _set_inter_residue_bonds(array, atom_site):
bond_array = _filter_bonds(array, "inter")
if len(bond_array) == 0:
return None

struct_conn = Category()
struct_conn["id"] = np.arange(1, len(bond_array) + 1)
struct_conn["conn_type_id"] = np.full(len(bond_array), "covale")
Expand Down
26 changes: 25 additions & 1 deletion tests/structure/test_pdbx.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def test_bond_conversion(tmpdir, format, path):
ref_bonds = atoms.bonds

pdbx_file = File()
# The import difference to `test_conversion()` is `include_bonds`
# The important difference to `test_conversion()` is `include_bonds`
pdbx.set_structure(pdbx_file, atoms, include_bonds=True)
file_path = join(tmpdir, f"test.{format}")
pdbx_file.write(file_path)
Expand All @@ -182,6 +182,30 @@ def test_bond_conversion(tmpdir, format, path):
assert test_bonds == ref_bonds


def test_bond_sparsity():
"""
Ensure that only as much intra-residue bonds are written as necessary,
i.e. the created ``chem_comp_bond`` category has at maximum category many rows as
the reference NextGen CIF file.
Less bonds are allowed, as not all atoms that a residue has in the CCD are actually
present in the structure.
This tests a previous bug, where duplicate intra-residue bonds were written
(https://github.com/biotite-dev/biotite/issues/652).
"""
path = join(data_dir("structure"), "nextgen", "pdb_00001l2y_xyz-enrich.cif")
ref_pdbx_file = pdbx.CIFFile.read(path)
ref_bond_number = ref_pdbx_file.block["chem_comp_bond"].row_count

atoms = pdbx.get_structure(ref_pdbx_file, model=1, include_bonds=True)
test_pdbx_file = pdbx.CIFFile()
pdbx.set_structure(test_pdbx_file, atoms, include_bonds=True)
test_bond_number = test_pdbx_file.block["chem_comp_bond"].row_count

assert test_bond_number <= ref_bond_number


@pytest.mark.parametrize("format", ["cif", "bcif"])
def test_extra_fields(tmpdir, format):
path = join(data_dir("structure"), f"1l2y.{format}")
Expand Down

0 comments on commit 1bc807a

Please sign in to comment.