Skip to content

Commit

Permalink
print the hierarchy of a compound (#1097)
Browse files Browse the repository at this point in the history
* replaced bondgraph with networkx

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* misc fix for docs

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* use more particles in residue map testing to ensure actually faster

* reduced number of children and grandchildren in test_nested_compound

* commented out silica_interface test

* added in print hierarchy functionality

* changed duplicate checking to also look at names of child compoudns and names of the particles contained by the given compound

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* added more tests for print hierarchy

* commented out test silica interface test

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* small edits to tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* added in test for missed lines

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update mbuild/compound.py

marking argument as optional

Co-authored-by: Co Quach <43968221+daico007@users.noreply.github.com>

* Update mbuild/compound.py

marking argument as optional

Co-authored-by: Co Quach <43968221+daico007@users.noreply.github.com>

* Update mbuild/compound.py

marking argument as optional

Co-authored-by: Co Quach <43968221+daico007@users.noreply.github.com>

* addressed PR comments

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* addressed minor docs formatting

* added in treelib to environment-docs.yml file

* tried to fix readthedocs issue unrelated to these changes

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: Christopher Iacovella <cri@MB22.local>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Co Quach <43968221+daico007@users.noreply.github.com>
Co-authored-by: Co Quach <daico007@gmail.com>
Co-authored-by: Christopher Iacovella <cri@MB22.hsd1.or.comcast.net>
  • Loading branch information
6 people authored Mar 14, 2023
1 parent 00d8c77 commit ab6d54d
Show file tree
Hide file tree
Showing 6 changed files with 286 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/environment-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dependencies:
- ele
- sphinx_rtd_theme
- widgetsnbextension
- treelib
- pip:
- sphinxcontrib-svg2pdfconverter[CairoSVG]
- cairosvg
1 change: 1 addition & 0 deletions environment-dev-win.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@ dependencies:
- python<3.10
- rdkit>=2021
- scipy
- treelib
1 change: 1 addition & 0 deletions environment-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ dependencies:
- python<3.10
- rdkit>=2021
- scipy
- treelib
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ dependencies:
- python<3.10
- rdkit>=2021
- scipy
- treelib
151 changes: 151 additions & 0 deletions mbuild/compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import numpy as np
from ele.element import Element, element_from_name, element_from_symbol
from ele.exceptions import ElementError
from treelib import Tree

from mbuild import conversion
from mbuild.bond_graph import BondGraph
Expand Down Expand Up @@ -278,6 +279,156 @@ def _contains_only_ports(self):
return False
return True

def print_hierarchy(self, print_full=False, index=None, show_tree=True):
"""Print the hierarchy of the Compound.
Parameters
----------
print_full: bool, optional, default=False
The full hierarchy will be printed, rather than condensing
compounds with identical topologies.
Topologies are considered identical if they have the same name,
contain the number and names of children,
contain the same number and names of particles,
and the same number of bonds.
index: int, optional, default=None
Print the branch of the first level of the hiearchy
corresponding to the value specified by index.
This only applies when print_full is True.
show_tree: bool, optional, default=True
If False, do not print the tree to the screen.
Returns
-------
tree, treelib.tree.Tree, hierarchy of the compound as a tree
"""
tree = Tree()

# loop through the hierarchy saving the data to an array hh
if print_full:
hh = [h for h in self._get_hierarchy()]
else:
hh = [h for h in self._get_hierarchy_nodup()]

# if our compound does not have any children we need to call n_direct_bonds instead of n_bonds
if len(self.children) == 0:
n_bonds = self.n_direct_bonds
else:
n_bonds = self.n_bonds

# add the top level compound to create the top level of the tree
# note that node identifiers passed as the second argument
# correspond to the compound id
tree.create_node(
f"{self.name}, {self.n_particles} particles, {n_bonds} bonds, {len(self.children)} children",
f"{id(self)}",
)

# if index is specified, ensure we are not selecting an index out of range
if not index is None:
if index >= len(self.children):
raise MBuildError(
f"Index {index} out of range. The number of first level nodes in the tree is {len(self.children)}."
)

count = -1

for h in hh:
if len(h["comp"].children) == 0:
n_bonds = h["comp"].n_direct_bonds
else:
n_bonds = h["comp"].n_bonds
if h["level"] == 0:
count = count + 1
if print_full:
if index is None:
tree.create_node(
f"[{h['comp'].name}]: {h['comp'].n_particles} particles, {n_bonds} bonds, {len(h['comp'].children)} children",
f"{h['comp_id']}",
f"{h['parent_id']}",
)
elif count == index:
tree.create_node(
f"[{h['comp'].name}]: {h['comp'].n_particles} particles, {n_bonds} bonds, {len(h['comp'].children)} children",
f"{h['comp_id']}",
f"{h['parent_id']}",
)
else:
tree.create_node(
f"[{h['comp'].name} x {h['n_dup']}], {h['comp'].n_particles} particles, {n_bonds} bonds, {len(h['comp'].children)} children",
f"{h['comp_id']}",
f"{h['parent_id']}",
)
if show_tree:
tree.show()
return tree

def _get_hierarchy(self, level=0):
"""Return an array of dictionaries corresponding to hierarchy of the compound, recursively."""
if not self.children:
return
for child in self.children:
yield {
"level": level,
"parent_id": id(self),
"comp_id": id(child),
"comp": child,
}
for subchild in child._get_hierarchy(level + 1):
yield subchild

def _get_hierarchy_nodup(self, level=0):
"""Return an array of dictionaries corresponding to hierarchy of the compound, recursively.
This routine will identify any duplicate compounds at a given level, including the number of
duplicates for each compound. Compounds are considered to be identical if the name,
number of children, and number of particles are the same at the same level.
"""
if not self.children:
return

duplicates = {}
for child in self.children:
part_string = "".join([part.name for part in child.particles()])
child_string = "".join([child.name for child in child.children])

if len(child.children) == 0:
n_bonds = child.n_direct_bonds
else:
n_bonds = child.n_bonds

identifier = f"{child.name}_{len(child.children)}_{child_string}_{child.n_particles}_{part_string}_{n_bonds}"

if not identifier in duplicates:
duplicates[identifier] = [1, True]
else:
duplicates[identifier][0] += 1

for child in self.children:
part_string = "".join([part.name for part in child.particles()])
child_string = "".join([child.name for child in child.children])

if len(child.children) == 0:
n_bonds = child.n_direct_bonds
else:
n_bonds = child.n_bonds

identifier = f"{child.name}_{len(child.children)}_{child_string}_{child.n_particles}_{part_string}_{n_bonds}"

if duplicates[identifier][1]:
yield {
"level": level,
"parent_id": id(self),
"comp_id": id(child),
"comp": child,
"n_dup": duplicates[identifier][0],
}

for subchild in child._get_hierarchy_nodup(level + 1):
yield subchild
duplicates[identifier][1] = False

def ancestors(self):
"""Generate all ancestors of the Compound recursively.
Expand Down
131 changes: 131 additions & 0 deletions mbuild/tests/test_compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,137 @@ def test_direct_bonds_cloning(self, ethane):
for p1, p2 in zip(ethane.particles(), ethane_clone.particles()):
assert p1.n_direct_bonds == p2.n_direct_bonds

def test_hierarchy(self, ethane):
# first check the hierarchy returned where we don't print duplicates
ethane_hierarchy = ethane._get_hierarchy_nodup()
eh = [t for t in ethane_hierarchy]
assert len(eh) == 3
assert eh[0]["level"] == 0
assert eh[0]["parent_id"] == id(ethane)
assert eh[0]["comp_id"] == id(ethane.children[0])
assert eh[0]["comp"] == ethane.children[0]

assert eh[0]["n_dup"] == 2
assert eh[1]["level"] == 1
assert eh[1]["parent_id"] == id(ethane.children[0])
assert eh[1]["comp_id"] == id(ethane.children[0].children[0])
assert eh[1]["comp"] == ethane.children[0].children[0]
assert eh[1]["n_dup"] == 1

assert eh[2]["level"] == 1
assert eh[2]["parent_id"] == id(ethane.children[0])
assert eh[2]["comp_id"] == id(ethane.children[0].children[1])
assert eh[2]["comp"] == ethane.children[0].children[1]
assert eh[2]["n_dup"] == 3

# now check the hierarchy returned with duplicates
ethane_hierarchy_full = ethane._get_hierarchy()
ehf = [t for t in ethane_hierarchy_full]
assert ehf[0]["level"] == 0
assert ehf[1]["level"] == 1
assert ehf[2]["level"] == 1
assert ehf[3]["level"] == 1
assert ehf[4]["level"] == 1
assert ehf[5]["level"] == 0
assert ehf[6]["level"] == 1
assert ehf[7]["level"] == 1
assert ehf[8]["level"] == 1
assert ehf[9]["level"] == 1

assert len(ehf) == 10
assert ehf[0]["parent_id"] == id(ethane)
assert ehf[0]["comp_id"] == id(ethane.children[0])
assert ehf[0]["comp"] == ethane.children[0]
assert ehf[1]["parent_id"] == id(ethane.children[0])
assert ehf[1]["comp_id"] == id(ethane.children[0].children[0])
assert ehf[1]["comp"] == ethane.children[0].children[0]
assert ehf[2]["parent_id"] == id(ethane.children[0])
assert ehf[2]["comp_id"] == id(ethane.children[0].children[1])
assert ehf[2]["comp"] == ethane.children[0].children[1]
assert ehf[3]["parent_id"] == id(ethane.children[0])
assert ehf[3]["comp_id"] == id(ethane.children[0].children[2])
assert ehf[3]["comp"] == ethane.children[0].children[2]
assert ehf[4]["parent_id"] == id(ethane.children[0])
assert ehf[4]["comp_id"] == id(ethane.children[0].children[3])
assert ehf[4]["comp"] == ethane.children[0].children[3]
assert ehf[5]["parent_id"] == id(ethane)
assert ehf[5]["comp_id"] == id(ethane.children[1])
assert ehf[5]["comp"] == ethane.children[1]
assert ehf[6]["parent_id"] == id(ethane.children[1])
assert ehf[6]["comp_id"] == id(ethane.children[1].children[0])
assert ehf[6]["comp"] == ethane.children[1].children[0]
assert ehf[7]["parent_id"] == id(ethane.children[1])
assert ehf[7]["comp_id"] == id(ethane.children[1].children[1])
assert ehf[7]["comp"] == ethane.children[1].children[1]
assert ehf[8]["parent_id"] == id(ethane.children[1])
assert ehf[8]["comp_id"] == id(ethane.children[1].children[2])
assert ehf[8]["comp"] == ethane.children[1].children[2]
assert ehf[9]["parent_id"] == id(ethane.children[1])
assert ehf[9]["comp_id"] == id(ethane.children[1].children[3])
assert ehf[9]["comp"] == ethane.children[1].children[3]

# examine the tree output from print_hierarchy
ethane_tree = ethane.print_hierarchy()
assert ethane_tree.depth() == 2
tree_json = ethane_tree.to_json(with_data=False)
assert (
tree_json
== '{"Ethane, 8 particles, 7 bonds, 2 children": {"children": [{"[CH3 x 2], 4 particles, 3 bonds, 4 children": {"children": ["[C x 1], 1 particles, 4 bonds, 0 children", "[H x 3], 1 particles, 1 bonds, 0 children"]}}]}}'
)

ethane_tree_full = ethane.print_hierarchy(print_full=True)
assert ethane_tree_full.depth() == 2
tree_json_full = ethane_tree_full.to_json(with_data=False)
assert (
tree_json_full
== '{"Ethane, 8 particles, 7 bonds, 2 children": {"children": [{"[CH3]: 4 particles, 3 bonds, 4 children": {"children": ["[C]: 1 particles, 4 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children"]}}, {"[CH3]: 4 particles, 3 bonds, 4 children": {"children": ["[C]: 1 particles, 4 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children"]}}]}}'
)

ethane_tree_full_index = ethane.print_hierarchy(
print_full=True, index=0
)
assert ethane_tree_full_index.depth() == 2
tree_json_full_index = ethane_tree_full_index.to_json(with_data=False)
assert (
tree_json_full_index
== '{"Ethane, 8 particles, 7 bonds, 2 children": {"children": [{"[CH3]: 4 particles, 3 bonds, 4 children": {"children": ["[C]: 1 particles, 4 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children", "[H]: 1 particles, 1 bonds, 0 children"]}}]}}'
)

system = mb.Compound()
system.add(mb.clone(ethane))
system.add(mb.clone(ethane))

system_hierarchy = system._get_hierarchy_nodup()

sh_array = [t for t in system_hierarchy]
assert len(sh_array) == 4

# let us change the name to ensure that it doens't see it as a duplicate
system.children[0].name = "Ethane_new_name"
system_hierarchy = system._get_hierarchy_nodup()

sh_array = [t for t in system_hierarchy]
assert len(sh_array) == 8

# make sure we throw an error if we try to index out of range
with pytest.raises(MBuildError):
system.print_hierarchy(print_full=True, index=10)
temp_particle = mb.Compound(name="C", element="C")
temp_tree = temp_particle.print_hierarchy()
assert temp_tree.depth() == 0

def test_show_hierarchy(self, capsys):
# test that the output written to the screen is correct
temp_particle = mb.Compound(name="C", element="C")
temp_particle.print_hierarchy()

captured = capsys.readouterr()
assert captured.out.strip() == "C, 1 particles, 0 bonds, 0 children"

temp_particle.print_hierarchy(show_tree=False)
captured = capsys.readouterr()
assert captured.out.strip() == ""

def test_load_protein(self):
# Testing the loading function with complicated protein,
# The protein file is taken from RCSB protein data bank
Expand Down

0 comments on commit ab6d54d

Please sign in to comment.