-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathtext_operation.py
276 lines (221 loc) · 8.39 KB
/
text_operation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
# Operations are lists of ops. There are three types of ops:
#
# * Insert ops: insert a given string at the current cursor position.
# Represented by strings.
# * Retain ops: Advance the cursor position by a given number of characters.
# Represented by positive ints.
# * Delete ops: Delete the next n characters. Represented by negative ints.
def _is_retain(op):
return isinstance(op, int) and op > 0
def _is_delete(op):
return isinstance(op, int) and op < 0
def _is_insert(op):
return isinstance(op, str)
def _op_len(op):
if isinstance(op, str):
return len(op)
if op < 0:
return -op
return op
def _shorten(op, by):
if isinstance(op, str):
return op[by:]
if op < 0:
return op + by
return op - by
def _shorten_ops(a, b):
"""Shorten two ops by the part that cancels each other out."""
len_a = _op_len(a)
len_b = _op_len(b)
if len_a == len_b:
return (None, None)
if len_a > len_b:
return (_shorten(a, len_b), None)
return (None, _shorten(b, len_a))
class TextOperation(object):
"""Diff between two strings."""
def __init__(self, ops=[]):
self.ops = ops[:]
def __eq__(self, other):
return isinstance(other, TextOperation) and self.ops == other.ops
def __iter__(self):
return self.ops.__iter__()
def __add__(self, other):
return self.compose(other)
def len_difference(self):
"""Returns the difference in length between the input and the output
string when this operations is applied.
"""
s = 0
for op in self:
if isinstance(op, str):
s += len(op)
elif op < 0:
s += op
return s
def retain(self, r):
"""Skips a given number of characters at the current cursor position."""
if r == 0:
return self
if len(self.ops) > 0 and isinstance(self.ops[-1], int) and self.ops[-1] > 0:
self.ops[-1] += r
else:
self.ops.append(r)
return self
def insert(self, s):
"""Inserts the given string at the current cursor position."""
if len(s) == 0:
return self
if len(self.ops) > 0 and isinstance(self.ops[-1], str):
self.ops[-1] += s
elif len(self.ops) > 0 and isinstance(self.ops[-1], int) and self.ops[-1] < 0:
# It doesn't matter when an operation is applied whether the operation
# is delete(3), insert("something") or insert("something"), delete(3).
# Here we enforce that in this case, the insert op always comes first.
# This makes all operations that have the same effect when applied to
# a document of the right length equal in respect to the `equals` method.
if len(self.ops) > 1 and isinstance(self.ops[-2], str):
self.ops[-2] += s
else:
self.ops.append(self.ops[-1])
self.ops[-2] = s
else:
self.ops.append(s)
return self
def delete(self, d):
"""Deletes a given number of characters at the current cursor position."""
if d == 0:
return self
if d > 0:
d = -d
if len(self.ops) > 0 and isinstance(self.ops[-1], int) and self.ops[-1] < 0:
self.ops[-1] += d
else:
self.ops.append(d)
return self
def __call__(self, doc):
"""Apply this operation to a string, returning a new string."""
i = 0
parts = []
for op in self:
if _is_retain(op):
if i + op > len(doc):
raise Exception("Cannot apply operation: operation is too long.")
parts.append(doc[i:(i + op)])
i += op
elif _is_insert(op):
parts.append(op)
else:
i -= op
if i > len(doc):
raise IncompatibleOperationError("Cannot apply operation: operation is too long.")
if i != len(doc):
raise IncompatibleOperationError("Cannot apply operation: operation is too short.")
return ''.join(parts)
def invert(self, doc):
"""Make an operation that does the opposite. When you apply an operation
to a string and then the operation generated by this operation, you
end up with your original string. This method can be used to implement
undo.
"""
i = 0
inverse = TextOperation()
for op in self:
if _is_retain(op):
inverse.retain(op)
i += op
elif _is_insert(op):
inverse.delete(len(op))
else:
inverse.insert(doc[i:(i - op)])
i -= op
return inverse
def compose(self, other):
"""Combine two consecutive operations into one that has the same effect
when applied to a document.
"""
iter_a = iter(self)
iter_b = iter(other)
operation = TextOperation()
a = b = None
while True:
if a == None:
a = next(iter_a, None)
if b == None:
b = next(iter_b, None)
if a == b == None:
# end condition: both operations have been processed
break
if _is_delete(a):
operation.delete(a)
a = None
continue
if _is_insert(b):
operation.insert(b)
b = None
continue
if a == None:
raise IncompatibleOperationError("Cannot compose operations: first operation is too short")
if b == None:
raise IncompatibleOperationError("Cannot compose operations: first operation is too long")
min_len = min(_op_len(a), _op_len(b))
if _is_retain(a) and _is_retain(b):
operation.retain(min_len)
elif _is_insert(a) and _is_retain(b):
operation.insert(a[:min_len])
elif _is_retain(a) and _is_delete(b):
operation.delete(min_len)
# remaining case: _is_insert(a) and _is_delete(b)
# in this case the delete op deletes the text that has been added
# by the insert operation and we don't need to do anything
(a, b) = _shorten_ops(a, b)
return operation
@staticmethod
def transform(operation_a, operation_b):
"""Transform two operations a and b to a' and b' such that b' applied
after a yields the same result as a' applied after b. Try to preserve
the operations' intentions in the process.
"""
iter_a = iter(operation_a)
iter_b = iter(operation_b)
a_prime = TextOperation()
b_prime = TextOperation()
a = b = None
while True:
if a == None:
a = next(iter_a, None)
if b == None:
b = next(iter_b, None)
if a == b == None:
# end condition: both operations have been processed
break
if _is_insert(a):
a_prime.insert(a)
b_prime.retain(len(a))
a = None
continue
if _is_insert(b):
a_prime.retain(len(b))
b_prime.insert(b)
b = None
continue
if a == None:
raise IncompatibleOperationError("Cannot compose operations: first operation is too short")
if b == None:
raise IncompatibleOperationError("Cannot compose operations: first operation is too long")
min_len = min(_op_len(a), _op_len(b))
if _is_retain(a) and _is_retain(b):
a_prime.retain(min_len)
b_prime.retain(min_len)
elif _is_delete(a) and _is_retain(b):
a_prime.delete(min_len)
elif _is_retain(a) and _is_delete(b):
b_prime.delete(min_len)
# remaining case: _is_delete(a) and _is_delete(b)
# in this case both operations delete the same string and we don't
# need to do anything
(a, b) = _shorten_ops(a, b)
return (a_prime, b_prime)
class IncompatibleOperationError(Exception):
"""Two operations or an operation and a string have different lengths."""
pass