-
Notifications
You must be signed in to change notification settings - Fork 108
/
Copy pathbraces.py
224 lines (186 loc) · 7.1 KB
/
braces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#
# Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
#
# SPDX-License-Identifier: BSD-2-Clause
#
from __future__ import print_function
class BracedString:
"""A string split into components based on delimiters (usually braces).
When l occurs in the string, create a new component whose contents are
the rest of the string until the matching r.
When l = ( and r = ), this has the approximate behavior of splitting
the string into the components of a Haskell function application,
where each individual component, if not containing the delimiters, can
be split on white space to determine the arguments of the function.
This behaves exactly like a str, except for split, map, and
discard_enclosing_braces.
Invariant: a component either has no delimiters, or is surrounded by
delimiters.
"""
def __init__(self, s, l, r, bits=None):
if bits is None:
bits = self._get_bits(s, l, r)
self.bits = bits
self.s = s
self.l = l
self.r = r
def _get_bits(self, s, l, r):
nesting_depth = 0
bits = ['']
for c in s:
if c == l:
if nesting_depth == 0:
if bits[-1]:
bits.append('')
nesting_depth = nesting_depth + 1
bits[-1] = bits[-1] + c
if c == r:
nesting_depth = nesting_depth - 1
if nesting_depth == 0:
if bits[-1]:
bits.append('')
if not bits[-1]:
bits.pop(-1)
return bits
def __str__(self):
return self.s
def __repr__(self):
check = BracedString(self.s, self.l, self.r)
if check.bits == self.bits:
return 'B%s%s: %r' % (self.l, self.r, self.s)
else:
return 'Broken Braced: %r, %r, %r' % (self.s, self.bits,
check.bits)
def __add__(self, other):
if isinstance(other, BracedString):
if self.bits[-1].startswith(self.l):
bits = self.bits + other.bits
elif other.bits[0].startswith(self.l):
bits = self.bits + other.bits
else:
bits = self.bits[:-1] + \
[self.bits[-1] + other.bits[0]] + \
other.bits[1:]
return BracedString(self.s + other.s, self.l, self.r, bits)
return BracedString(self.s + other, self.l, self.r)
def __eq__(self, other):
return other == self.s
def __ne__(self, other):
return other != self.s
def __iter__(self):
return iter(self.s)
def __getitem__(self, n):
return self.s[n]
def __getslice__(self, i, j):
return self.s.__getslice__(i, j)
def __len__(self):
return len(self.s)
def split(self, str=None, num=-2, braces=False):
"""Split into multiple BracedStrings, using `str` as a delimiter, and
into a maximum of `num` components.
If `braces` is true (defaults to false), braces will also count as a
delimiter, and each braced component will become a single element of
the output.
Otherwise, each braced pair will not be split into a separate
component, but splitting will ignore the contents inside the
delimiter.
"""
if braces:
bits = []
bbs = []
for bit in self.bits:
d = num + 1 - len(bits)
if d == 0:
bits[-1] = bits[-1] + bit
bbs[-1].append(bit)
elif bit.startswith(self.l):
bits.append(bit)
bbs.append([bit])
else:
if num == -2:
n_bits = bit.split(str)
else:
n_bits = bit.split(str, d)
bits.extend(n_bits)
bbs.extend([[b] for b in n_bits])
else:
# s is the original string, but with delimited substrings replaced
# with just the delimiters
s = ''
internals = []
for bit in self.bits:
if bit.startswith(self.l):
s = s + self.l + self.r
internals.append(bit)
else:
s = s + bit
# split on the thing, secure in the knowledge that it won't mess
# up things inside delimiters.
bits1 = s.split(str, num)
bits = []
bbs = []
for bit in bits1:
# Invariant: if self.{l,r} not in bit, bit remains whole.
# split on delimiters, which we inserted earlier
bits2 = bit.split(self.l + self.r)
meshed = [bits2.pop(0)]
while bits2:
# If this list has more elements, then we need to insert,
# where each delimiter pair was, the corresponding
# contents which we stored in `internals`.
meshed.append(internals.pop(0))
# then we add in the next component of the string, which
# was after that delimiter pair.
meshed.append(bits2.pop(0))
# remove empty strings
meshed = [s for s in meshed if s != '']
bbs.append(meshed)
bits.append(''.join(meshed))
return [BracedString(bit, self.l, self.r, bbs[i])
for i, bit in enumerate(bits)]
def startswith(self, s):
return self.s.startswith(s)
def endswith(self, s):
return self.s.endswith(s)
def map(self, fn):
"""Apply a function to each component of this braced string.
For delimited components, the delimiters will not be passed to the
function.
"""
new_s = ''
new_bits = []
for bit in self.bits:
if bit.startswith(self.l):
new = fn(bit[1:-1])
new = self.l + new + self.r
new_s = new_s + new
new_bits.append(new)
else:
new_s = new_s + bit
new_bits.append(bit)
return BracedString(new_s, self.l, self.r, new_bits)
def discard_enclosing_braces(self):
"""If the string consists of one braced expression,
discard the redundant enclosing braces. Otherwise
return the string."""
if len(self.bits) > 1:
return self
[bit] = self.bits
if bit.startswith(self.l):
return BracedString(bit[1:-1], self.l, self.r)
else:
return self
def clone(str, obj):
if isinstance(obj, BracedString):
return BracedString(str.__str__(), obj.l, obj.r)
else:
return str
str = BracedString
if __name__ == '__main__':
x = BracedString('a => b => c => (d => (e, f))', '(', ')')
print(x.split('=>'))
print(x.split(','))
print(1, x.split('=>', 1))
print(2, x.split('=>', 2))
print(3, x.split('=>', 3))
print([y.split() for y in x.split('=>')])