-
Notifications
You must be signed in to change notification settings - Fork 0
/
tree.py
221 lines (189 loc) · 6.87 KB
/
tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
"""
Implements a recursive tree data structure for internal use.
"""
# Copyright (C) 2020
# Benjamin Paaßen
# The University of Sydney
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
__author__ = 'Benjamin Paaßen'
__copyright__ = 'Copyright 2020, Benjamin Paaßen'
__license__ = 'GPLv3'
__version__ = '0.2.0'
__maintainer__ = 'Benjamin Paaßen'
__email__ = 'benjamin.paassen@sydney.edu.au'
import numpy as np
class Tree:
""" Models a tree as a recursive data structure with a label and
children.
Attributes
----------
_label: str
The label of this tree.
_children: list
The children of this tree. Should be a list of trees.
"""
def __init__(self, label, children = None):
self._label = label
if(children is None):
self._children = []
else:
self._children = children
def __str__(self):
out = str(self._label)
if not self._children:
return out
else:
child_strs = [str(child) for child in self._children]
return out + '(%s)' % ', '.join(child_strs)
def pretty_print(self, indent = 2, level = 0):
# the initial string is just the node label
if level > 0:
indent_str = (' ' * indent) * level
else:
indent_str = ''
tree_string = indent_str + str(self._label)
# consider the case where node i has children
if self._children:
# first, translate the children to their tree strings
children_strings = []
for child in self._children:
children_strings.append(child.pretty_print(indent, level + 1))
return tree_string + '(\n' + ',\n'.join(children_strings) + '\n' + indent_str + ')'
else:
return tree_string
def __repr__(self):
return self.__str__()
def __eq__(self, other):
if isinstance(other, Tree):
return self._label == other._label and self._children == other._children
return False
def size(self):
size = 1
for child in self._children:
size += child.size()
return size
def to_list_format(self):
""" Convers this tree to node list/adjacency list format via depth
first search.
Returns
-------
nodes: list
the node list.
adj: list
the adjacency list.
"""
# initialize node and adjacency list
nodes = []
adj = []
# perform the depth first search via a stack which stores
# the parent index and the current tree
stk = [(-1, self)]
while(stk):
p, tree = stk.pop()
i = len(nodes)
# append the label to the node list
nodes.append(tree._label)
# append a new empty child list
adj.append([])
# append the current node to its parent
if(p >= 0):
adj[p].append(i)
# push the children onto the stack
for c in range(len(tree._children)-1, -1, -1):
if(tree._children[c] is None):
continue
if(isinstance(tree._children[c], list)):
for c2 in range(len(tree._children[c]) -1, -1, -1):
stk.append((i, tree._children[c][c2]))
continue
stk.append((i, tree._children[c]))
return nodes, adj
# This code is copied from
# https://gitlab.ub.uni-bielefeld.de/bpaassen/python-edit-distances/-/blob/master/edist/tree_utils.py
def root(adj):
""" Returns the root of a tree and raises an error if the input adjacency
matrix does not correspond to a tree.
Parameters
----------
adj: list
a directed graph in adjacency list format.
Returns
-------
root: int
the index of the root of the tree.
Raises
------
ValueError
if the given adjacency list does not form a tree.
"""
if(not adj):
raise ValueError("The input tree is empty!")
par = np.full(len(adj), -1, dtype=int)
for i in range(len(adj)):
for j in adj[i]:
if(par[j] < 0):
par[j] = i
else:
raise ValueError("Input is not a tree because node %d has multiple parents" % j)
root = -1
for i in range(len(adj)):
if(par[i] < 0):
if(root < 0):
root = i
else:
raise ValueError("Input is not a tree because there is more than one root")
return root
def tree_to_string(nodes, adj, indent = False, with_indices = False):
""" Prints a tree in node list/adjacency list format as string.
Parameters
----------
nodes: list
The node list of the tree.
adj: list
The adjacency list of the tree.
indent: bool (default = False)
A boolean flag; if True, each node is printed on a new line.
with_indices: bool (default = False)
A boolean flag; if True, each node is printed with its index.
Raises
------
ValueError
if the adjacency list does not correspond to a tree.
"""
r = root(adj)
if(indent):
indent = 1
else:
indent = None
# translate recursively
return _tree_to_string(nodes, adj, r, indent, with_indices = with_indices)
def _tree_to_string(nodes, adj, i, indent = None, with_indices = False):
# the initial string is just the node label
if(with_indices):
tree_string = '%d: %s' % (i, str(nodes[i]))
else:
tree_string = str(nodes[i])
# consider the case where node i has children
if(adj[i]):
# first, translate the children to their tree strings
children_strings = []
for j in adj[i]:
if(indent is None):
children_strings.append(_tree_to_string(nodes, adj, j, with_indices = with_indices))
else:
children_strings.append(_tree_to_string(nodes, adj, j, indent + 1, with_indices = with_indices))
# and then join all these strings and append them in brackets
if(indent is None):
tree_string += '(' + ', '.join(children_strings) + ')'
else:
tree_string += '(\n' + ('\t' * indent) + (',\n' + '\t' * indent).join(children_strings) + '\n' + ('\t' * (indent - 1)) + ')'
return tree_string